472 lines
12 KiB
C++
472 lines
12 KiB
C++
/*
|
|
* Copyright © 2018 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#undef NDEBUG
|
|
|
|
#include <gtest/gtest.h>
|
|
#include "util/bigmath.h"
|
|
#include "util/fast_idiv_by_const.h"
|
|
#include "util/u_math.h"
|
|
|
|
#define RAND_TEST_ITERATIONS 100000
|
|
|
|
static inline uint64_t
|
|
utrunc(uint64_t x, unsigned num_bits)
|
|
{
|
|
if (num_bits == 64)
|
|
return x;
|
|
|
|
return (x << (64 - num_bits)) >> (64 - num_bits);
|
|
}
|
|
|
|
static inline int64_t
|
|
strunc(int64_t x, unsigned num_bits)
|
|
{
|
|
if (num_bits == 64)
|
|
return x;
|
|
|
|
return (int64_t)((uint64_t)x << (64 - num_bits)) >> (64 - num_bits);
|
|
}
|
|
|
|
static inline bool
|
|
uint_is_in_range(uint64_t x, unsigned num_bits)
|
|
{
|
|
if (num_bits == 64)
|
|
return true;
|
|
|
|
return x < (1ull << num_bits);
|
|
}
|
|
|
|
static inline bool
|
|
sint_is_in_range(int64_t x, unsigned num_bits)
|
|
{
|
|
if (num_bits == 64)
|
|
return true;
|
|
|
|
return x >= -(1ll << (num_bits - 1)) &&
|
|
x < (1ll << (num_bits - 1));
|
|
}
|
|
|
|
static inline uint64_t
|
|
uadd_sat(uint64_t a, uint64_t b, unsigned num_bits)
|
|
{
|
|
assert(uint_is_in_range(a, num_bits));
|
|
assert(uint_is_in_range(b, num_bits));
|
|
|
|
uint64_t sum = a + b;
|
|
if (num_bits == 64) {
|
|
/* Standard overflow check */
|
|
return sum < a ? UINT64_MAX : sum;
|
|
} else {
|
|
/* Check if sum is more than num_bits */
|
|
return (sum >> num_bits) ? u_uintN_max(num_bits) : sum;
|
|
}
|
|
}
|
|
|
|
static inline uint64_t
|
|
umul_add_high(uint64_t a, uint64_t b, uint64_t c, unsigned num_bits)
|
|
{
|
|
assert(uint_is_in_range(a, num_bits));
|
|
assert(uint_is_in_range(b, num_bits));
|
|
assert(uint_is_in_range(c, num_bits));
|
|
|
|
if (num_bits == 64) {
|
|
uint32_t a32[2] = { (uint32_t)a, (uint32_t)(a >> 32) };
|
|
uint32_t b32[2] = { (uint32_t)b, (uint32_t)(b >> 32) };
|
|
uint32_t c32[2] = { (uint32_t)c, (uint32_t)(c >> 32) };
|
|
|
|
uint32_t ab32[4];
|
|
ubm_mul_u32arr(ab32, a32, b32);
|
|
|
|
uint32_t abc32[4];
|
|
ubm_add_u32arr(abc32, ab32, c32);
|
|
|
|
return abc32[2] | ((uint64_t)abc32[3] << 32);
|
|
} else {
|
|
assert(num_bits <= 32);
|
|
return utrunc(((a * b) + c) >> num_bits, num_bits);
|
|
}
|
|
}
|
|
|
|
static inline int64_t
|
|
smul_high(int64_t a, int64_t b, unsigned num_bits)
|
|
{
|
|
assert(sint_is_in_range(a, num_bits));
|
|
assert(sint_is_in_range(b, num_bits));
|
|
|
|
if (num_bits == 64) {
|
|
uint32_t a32[4] = {
|
|
(uint32_t)a,
|
|
(uint32_t)(a >> 32),
|
|
(uint32_t)(a >> 63), /* sign extend */
|
|
(uint32_t)(a >> 63), /* sign extend */
|
|
};
|
|
uint32_t b32[4] = {
|
|
(uint32_t)b,
|
|
(uint32_t)(b >> 32),
|
|
(uint32_t)(b >> 63), /* sign extend */
|
|
(uint32_t)(b >> 63), /* sign extend */
|
|
};
|
|
|
|
uint32_t ab32[4];
|
|
ubm_mul_u32arr(ab32, a32, b32);
|
|
|
|
return ab32[2] | ((uint64_t)ab32[3] << 32);
|
|
} else {
|
|
assert(num_bits <= 32);
|
|
return strunc((a * b) >> num_bits, num_bits);
|
|
}
|
|
}
|
|
|
|
static inline uint64_t
|
|
fast_udiv_add_sat(uint64_t n, struct util_fast_udiv_info m, unsigned num_bits)
|
|
{
|
|
assert(uint_is_in_range(n, num_bits));
|
|
assert(uint_is_in_range(m.multiplier, num_bits));
|
|
|
|
n = n >> m.pre_shift;
|
|
n = uadd_sat(n, m.increment, num_bits);
|
|
n = umul_add_high(n, m.multiplier, 0, num_bits);
|
|
n = n >> m.post_shift;
|
|
|
|
return n;
|
|
}
|
|
|
|
static inline uint64_t
|
|
fast_udiv_mul_add(uint64_t n, struct util_fast_udiv_info m, unsigned num_bits)
|
|
{
|
|
assert(uint_is_in_range(n, num_bits));
|
|
assert(uint_is_in_range(m.multiplier, num_bits));
|
|
|
|
n = n >> m.pre_shift;
|
|
n = umul_add_high(n, m.multiplier,
|
|
m.increment ? m.multiplier : 0,
|
|
num_bits);
|
|
n = n >> m.post_shift;
|
|
|
|
return n;
|
|
}
|
|
|
|
static inline uint64_t
|
|
fast_sdiv(int64_t n, int64_t d, struct util_fast_sdiv_info m, unsigned num_bits)
|
|
{
|
|
assert(sint_is_in_range(n, num_bits));
|
|
assert(sint_is_in_range(d, num_bits));
|
|
assert(sint_is_in_range(m.multiplier, num_bits));
|
|
|
|
int64_t res;
|
|
res = smul_high(n, m.multiplier, num_bits);
|
|
if (d > 0 && m.multiplier < 0)
|
|
res = strunc(res + n, num_bits);
|
|
if (d < 0 && m.multiplier > 0)
|
|
res = strunc(res - n, num_bits);
|
|
res = res >> m.shift;
|
|
res = res - (res >> (num_bits - 1));
|
|
|
|
return res;
|
|
}
|
|
|
|
static uint64_t
|
|
rand_uint(unsigned bits, unsigned min)
|
|
{
|
|
assert(bits >= 4);
|
|
|
|
/* Make sure we get some small and large numbers and powers of two every
|
|
* once in a while
|
|
*/
|
|
int k = rand() % 64;
|
|
if (k == 17) {
|
|
return min + (rand() % 16);
|
|
} else if (k == 42) {
|
|
return u_uintN_max(bits) - (rand() % 16);
|
|
} else if (k == 9) {
|
|
uint64_t r;
|
|
do {
|
|
r = 1ull << (rand() % bits);
|
|
} while (r < min);
|
|
return r;
|
|
}
|
|
|
|
if (min == 0) {
|
|
assert(bits <= 64);
|
|
uint64_t r = 0;
|
|
for (unsigned i = 0; i < 8; i++)
|
|
r |= ((uint64_t)rand() & 0xf) << i * 8;
|
|
return r >> (63 - (rand() % bits));
|
|
} else {
|
|
uint64_t r;
|
|
do {
|
|
r = rand_uint(bits, 0);
|
|
} while (r < min);
|
|
return r;
|
|
}
|
|
}
|
|
|
|
static int64_t
|
|
rand_sint(unsigned bits, unsigned min_abs)
|
|
{
|
|
/* Make sure we hit MIN_INT every once in a while */
|
|
if (rand() % 64 == 37)
|
|
return u_intN_min(bits);
|
|
|
|
int64_t s = rand_uint(bits - 1, min_abs);
|
|
return rand() & 1 ? s : -s;
|
|
}
|
|
|
|
static uint64_t
|
|
udiv(uint64_t a, uint64_t b, unsigned bit_size)
|
|
{
|
|
switch (bit_size) {
|
|
case 64: return (uint64_t)a / (uint64_t)b;
|
|
case 32: return (uint32_t)a / (uint32_t)b;
|
|
case 16: return (uint16_t)a / (uint16_t)b;
|
|
case 8: return (uint8_t)a / (uint8_t)b;
|
|
default:
|
|
assert(!"Invalid bit size");
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
static int64_t
|
|
sdiv(int64_t a, int64_t b, unsigned bit_size)
|
|
{
|
|
switch (bit_size) {
|
|
case 64: return (int64_t)a / (int64_t)b;
|
|
case 32: return (int32_t)a / (int32_t)b;
|
|
case 16: return (int16_t)a / (int16_t)b;
|
|
case 8: return (int8_t)a / (int8_t)b;
|
|
default:
|
|
assert(!"Invalid bit size");
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
static void
|
|
random_udiv_add_sat_test(unsigned bits, bool bounded)
|
|
{
|
|
for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
|
|
uint64_t n = rand_uint(bits, 0);
|
|
uint64_t d = rand_uint(bits, 2);
|
|
assert(uint_is_in_range(n, bits));
|
|
assert(uint_is_in_range(d, bits) && d >= 2);
|
|
|
|
unsigned n_bits = bounded ? util_logbase2_64(MAX2(n, 1)) + 1 : bits;
|
|
|
|
struct util_fast_udiv_info m =
|
|
util_compute_fast_udiv_info(d, n_bits, bits);
|
|
EXPECT_EQ(fast_udiv_add_sat(n, m, bits), udiv(n, d, bits));
|
|
}
|
|
}
|
|
|
|
static void
|
|
random_udiv_mul_add_test(unsigned bits, bool bounded)
|
|
{
|
|
for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
|
|
uint64_t n = rand_uint(bits, 0);
|
|
uint64_t d = rand_uint(bits, 1);
|
|
assert(uint_is_in_range(n, bits));
|
|
assert(uint_is_in_range(d, bits) && d >= 1);
|
|
|
|
unsigned n_bits = bounded ? util_logbase2_64(MAX2(n, 1)) + 1: bits;
|
|
|
|
struct util_fast_udiv_info m =
|
|
util_compute_fast_udiv_info(d, n_bits, bits);
|
|
EXPECT_EQ(fast_udiv_mul_add(n, m, bits), udiv(n, d, bits));
|
|
}
|
|
}
|
|
|
|
static void
|
|
random_sdiv_test(unsigned bits)
|
|
{
|
|
for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
|
|
int64_t n = rand_sint(bits, 0);
|
|
int64_t d;
|
|
do {
|
|
d = rand_sint(bits, 2);
|
|
} while (d == INT64_MIN || util_is_power_of_two_or_zero64(llabs(d)));
|
|
|
|
assert(sint_is_in_range(n, bits));
|
|
assert(sint_is_in_range(d, bits) && llabs(d) >= 2);
|
|
|
|
struct util_fast_sdiv_info m =
|
|
util_compute_fast_sdiv_info(d, bits);
|
|
EXPECT_EQ(fast_sdiv(n, d, m, bits), sdiv(n, d, bits));
|
|
}
|
|
}
|
|
|
|
TEST(fast_idiv_by_const, uint8_add_sat)
|
|
{
|
|
/* 8-bit is small enough we can brute-force the entire space */
|
|
for (unsigned d = 2; d < 256; d++) {
|
|
for (unsigned n_bits = 1; n_bits <= 8; n_bits++) {
|
|
struct util_fast_udiv_info m =
|
|
util_compute_fast_udiv_info(d, n_bits, 8);
|
|
|
|
for (unsigned n = 0; n < (1u << n_bits); n++)
|
|
EXPECT_EQ(fast_udiv_add_sat(n, m, 8), udiv(n, d, 8));
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST(fast_idiv_by_const, uint8_mul_add)
|
|
{
|
|
/* 8-bit is small enough we can brute-force the entire space */
|
|
for (unsigned d = 2; d < 256; d++) {
|
|
for (unsigned n_bits = 1; n_bits <= 8; n_bits++) {
|
|
struct util_fast_udiv_info m =
|
|
util_compute_fast_udiv_info(d, n_bits, 8);
|
|
|
|
for (unsigned n = 0; n < (1u << n_bits); n++)
|
|
EXPECT_EQ(fast_udiv_mul_add(n, m, 8), udiv(n, d, 8));
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST(fast_idiv_by_const, int8)
|
|
{
|
|
/* 8-bit is small enough we can brute-force the entire space */
|
|
for (int n = -128; n < 128; n++) {
|
|
for (int d = -128; d < 128; d++) {
|
|
if (util_is_power_of_two_or_zero(abs(d)))
|
|
continue;
|
|
|
|
struct util_fast_sdiv_info m =
|
|
util_compute_fast_sdiv_info(d, 8);
|
|
EXPECT_EQ(fast_sdiv(n, d, m, 8), sdiv(n, d, 8));
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST(fast_idiv_by_const, uint16_add_sat_bounded)
|
|
{
|
|
random_udiv_add_sat_test(16, true);
|
|
}
|
|
|
|
TEST(fast_idiv_by_const, uint16_add_sat_full)
|
|
{
|
|
random_udiv_add_sat_test(16, false);
|
|
}
|
|
|
|
TEST(fast_idiv_by_const, uint16_mul_add_bounded)
|
|
{
|
|
random_udiv_mul_add_test(16, true);
|
|
}
|
|
|
|
TEST(fast_idiv_by_const, uint16_mul_add_full)
|
|
{
|
|
random_udiv_mul_add_test(16, false);
|
|
}
|
|
|
|
TEST(fast_idiv_by_const, int16)
|
|
{
|
|
random_sdiv_test(16);
|
|
}
|
|
|
|
TEST(fast_idiv_by_const, uint32_add_sat_bounded)
|
|
{
|
|
random_udiv_add_sat_test(32, true);
|
|
}
|
|
|
|
TEST(fast_idiv_by_const, uint32_add_sat_full)
|
|
{
|
|
random_udiv_add_sat_test(32, false);
|
|
}
|
|
|
|
TEST(fast_idiv_by_const, uint32_mul_add_bounded)
|
|
{
|
|
random_udiv_mul_add_test(32, true);
|
|
}
|
|
|
|
TEST(fast_idiv_by_const, uint32_mul_add_full)
|
|
{
|
|
random_udiv_mul_add_test(32, false);
|
|
}
|
|
|
|
TEST(fast_idiv_by_const, int32)
|
|
{
|
|
random_sdiv_test(32);
|
|
}
|
|
|
|
TEST(fast_idiv_by_const, util_fast_udiv32)
|
|
{
|
|
for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
|
|
uint32_t n = rand_uint(32, 0);
|
|
uint32_t d = rand_uint(32, 1);
|
|
|
|
struct util_fast_udiv_info m =
|
|
util_compute_fast_udiv_info(d, 32, 32);
|
|
EXPECT_EQ(util_fast_udiv32(n, m), n / d);
|
|
}
|
|
}
|
|
|
|
TEST(fast_idiv_by_const, util_fast_udiv32_nuw)
|
|
{
|
|
for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
|
|
uint32_t n = rand_uint(32, 0);
|
|
if (n == UINT32_MAX)
|
|
continue;
|
|
uint32_t d = rand_uint(32, 1);
|
|
|
|
struct util_fast_udiv_info m =
|
|
util_compute_fast_udiv_info(d, 32, 32);
|
|
EXPECT_EQ(util_fast_udiv32_nuw(n, m), n / d);
|
|
}
|
|
}
|
|
|
|
TEST(fast_idiv_by_const, util_fast_udiv32_u31_d_not_one)
|
|
{
|
|
for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
|
|
uint32_t n = rand_uint(31, 0);
|
|
uint32_t d = rand_uint(31, 2);
|
|
|
|
struct util_fast_udiv_info m =
|
|
util_compute_fast_udiv_info(d, 31, 32);
|
|
EXPECT_EQ(util_fast_udiv32_u31_d_not_one(n, m), n / d);
|
|
}
|
|
}
|
|
|
|
TEST(fast_idiv_by_const, uint64_add_sat_bounded)
|
|
{
|
|
random_udiv_add_sat_test(64, true);
|
|
}
|
|
|
|
TEST(fast_idiv_by_const, uint64_add_sat_full)
|
|
{
|
|
random_udiv_add_sat_test(64, false);
|
|
}
|
|
|
|
TEST(fast_idiv_by_const, uint64_mul_add_bounded)
|
|
{
|
|
random_udiv_mul_add_test(64, true);
|
|
}
|
|
|
|
TEST(fast_idiv_by_const, uint64_mul_add_full)
|
|
{
|
|
random_udiv_mul_add_test(64, false);
|
|
}
|
|
|
|
TEST(fast_idiv_by_const, int64)
|
|
{
|
|
random_sdiv_test(64);
|
|
}
|