• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #undef NDEBUG
25 
26 #include <gtest/gtest.h>
27 #include "util/bigmath.h"
28 #include "util/fast_idiv_by_const.h"
29 #include "util/u_math.h"
30 
31 #define RAND_TEST_ITERATIONS 100000
32 
33 static inline uint64_t
utrunc(uint64_t x,unsigned num_bits)34 utrunc(uint64_t x, unsigned num_bits)
35 {
36    if (num_bits == 64)
37       return x;
38 
39    return (x << (64 - num_bits)) >> (64 - num_bits);
40 }
41 
42 static inline int64_t
strunc(int64_t x,unsigned num_bits)43 strunc(int64_t x, unsigned num_bits)
44 {
45    if (num_bits == 64)
46       return x;
47 
48    return (int64_t)((uint64_t)x << (64 - num_bits)) >> (64 - num_bits);
49 }
50 
51 static inline bool
uint_is_in_range(uint64_t x,unsigned num_bits)52 uint_is_in_range(uint64_t x, unsigned num_bits)
53 {
54    if (num_bits == 64)
55       return true;
56 
57    return x < (1ull << num_bits);
58 }
59 
60 static inline bool
sint_is_in_range(int64_t x,unsigned num_bits)61 sint_is_in_range(int64_t x, unsigned num_bits)
62 {
63    if (num_bits == 64)
64       return true;
65 
66    return x >= -(1ll << (num_bits - 1)) &&
67           x < (1ll << (num_bits - 1));
68 }
69 
70 static inline uint64_t
uadd_sat(uint64_t a,uint64_t b,unsigned num_bits)71 uadd_sat(uint64_t a, uint64_t b, unsigned num_bits)
72 {
73    assert(uint_is_in_range(a, num_bits));
74    assert(uint_is_in_range(b, num_bits));
75 
76    uint64_t sum = a + b;
77    if (num_bits == 64) {
78       /* Standard overflow check */
79       return sum < a ? UINT64_MAX : sum;
80    } else {
81       /* Check if sum is more than num_bits */
82       return (sum >> num_bits) ? u_uintN_max(num_bits) : sum;
83    }
84 }
85 
86 static inline uint64_t
umul_add_high(uint64_t a,uint64_t b,uint64_t c,unsigned num_bits)87 umul_add_high(uint64_t a, uint64_t b, uint64_t c, unsigned num_bits)
88 {
89    assert(uint_is_in_range(a, num_bits));
90    assert(uint_is_in_range(b, num_bits));
91    assert(uint_is_in_range(c, num_bits));
92 
93    if (num_bits == 64) {
94       uint32_t a32[2] = { (uint32_t)a, (uint32_t)(a >> 32) };
95       uint32_t b32[2] = { (uint32_t)b, (uint32_t)(b >> 32) };
96       uint32_t c32[2] = { (uint32_t)c, (uint32_t)(c >> 32) };
97 
98       uint32_t ab32[4];
99       ubm_mul_u32arr(ab32, a32, b32);
100 
101       uint32_t abc32[4];
102       ubm_add_u32arr(abc32, ab32, c32);
103 
104       return abc32[2] | ((uint64_t)abc32[3] << 32);
105    } else {
106       assert(num_bits <= 32);
107       return utrunc(((a * b) + c) >> num_bits, num_bits);
108    }
109 }
110 
111 static inline int64_t
smul_high(int64_t a,int64_t b,unsigned num_bits)112 smul_high(int64_t a, int64_t b, unsigned num_bits)
113 {
114    assert(sint_is_in_range(a, num_bits));
115    assert(sint_is_in_range(b, num_bits));
116 
117    if (num_bits == 64) {
118       uint32_t a32[4] = {
119          (uint32_t)a,
120          (uint32_t)(a >> 32),
121          (uint32_t)(a >> 63), /* sign extend */
122          (uint32_t)(a >> 63), /* sign extend */
123       };
124       uint32_t b32[4] = {
125          (uint32_t)b,
126          (uint32_t)(b >> 32),
127          (uint32_t)(b >> 63), /* sign extend */
128          (uint32_t)(b >> 63), /* sign extend */
129       };
130 
131       uint32_t ab32[4];
132       ubm_mul_u32arr(ab32, a32, b32);
133 
134       return ab32[2] | ((uint64_t)ab32[3] << 32);
135    } else {
136       assert(num_bits <= 32);
137       return strunc((a * b) >> num_bits, num_bits);
138    }
139 }
140 
141 static inline uint64_t
fast_udiv_add_sat(uint64_t n,struct util_fast_udiv_info m,unsigned num_bits)142 fast_udiv_add_sat(uint64_t n, struct util_fast_udiv_info m, unsigned num_bits)
143 {
144    assert(uint_is_in_range(n, num_bits));
145    assert(uint_is_in_range(m.multiplier, num_bits));
146 
147    n = n >> m.pre_shift;
148    n = uadd_sat(n, m.increment, num_bits);
149    n = umul_add_high(n, m.multiplier, 0, num_bits);
150    n = n >> m.post_shift;
151 
152    return n;
153 }
154 
155 static inline uint64_t
fast_udiv_mul_add(uint64_t n,struct util_fast_udiv_info m,unsigned num_bits)156 fast_udiv_mul_add(uint64_t n, struct util_fast_udiv_info m, unsigned num_bits)
157 {
158    assert(uint_is_in_range(n, num_bits));
159    assert(uint_is_in_range(m.multiplier, num_bits));
160 
161    n = n >> m.pre_shift;
162    n = umul_add_high(n, m.multiplier,
163                      m.increment ? m.multiplier : 0,
164                      num_bits);
165    n = n >> m.post_shift;
166 
167    return n;
168 }
169 
170 static inline uint64_t
fast_sdiv(int64_t n,int64_t d,struct util_fast_sdiv_info m,unsigned num_bits)171 fast_sdiv(int64_t n, int64_t d, struct util_fast_sdiv_info m, unsigned num_bits)
172 {
173    assert(sint_is_in_range(n, num_bits));
174    assert(sint_is_in_range(d, num_bits));
175    assert(sint_is_in_range(m.multiplier, num_bits));
176 
177    int64_t res;
178    res = smul_high(n, m.multiplier, num_bits);
179    if (d > 0 && m.multiplier < 0)
180       res = strunc(res + n, num_bits);
181    if (d < 0 && m.multiplier > 0)
182       res = strunc(res - n, num_bits);
183    res = res >> m.shift;
184    res = res - (res >> (num_bits - 1));
185 
186    return res;
187 }
188 
189 static uint64_t
rand_uint(unsigned bits,unsigned min)190 rand_uint(unsigned bits, unsigned min)
191 {
192    assert(bits >= 4);
193 
194    /* Make sure we get some small and large numbers and powers of two every
195     * once in a while
196     */
197    int k = rand() % 64;
198    if (k == 17) {
199       return min + (rand() % 16);
200    } else if (k == 42) {
201       return u_uintN_max(bits) - (rand() % 16);
202    } else if (k == 9) {
203       uint64_t r;
204       do {
205          r = 1ull << (rand() % bits);
206       } while (r < min);
207       return r;
208    }
209 
210    if (min == 0) {
211       assert(bits <= 64);
212       uint64_t r = 0;
213       for (unsigned i = 0; i < 8; i++)
214          r |= ((uint64_t)rand() & 0xf) << i * 8;
215       return r >> (63 - (rand() % bits));
216    } else {
217       uint64_t r;
218       do {
219          r = rand_uint(bits, 0);
220       } while (r < min);
221       return r;
222    }
223 }
224 
225 static int64_t
rand_sint(unsigned bits,unsigned min_abs)226 rand_sint(unsigned bits, unsigned min_abs)
227 {
228    /* Make sure we hit MIN_INT every once in a while */
229    if (rand() % 64 == 37)
230       return u_intN_min(bits);
231 
232    int64_t s = rand_uint(bits - 1, min_abs);
233    return rand() & 1 ? s : -s;
234 }
235 
236 static uint64_t
udiv(uint64_t a,uint64_t b,unsigned bit_size)237 udiv(uint64_t a, uint64_t b, unsigned bit_size)
238 {
239    switch (bit_size) {
240    case 64: return (uint64_t)a / (uint64_t)b;
241    case 32: return (uint32_t)a / (uint32_t)b;
242    case 16: return (uint16_t)a / (uint16_t)b;
243    case 8:  return  (uint8_t)a / (uint8_t)b;
244    default:
245       assert(!"Invalid bit size");
246       return 0;
247    }
248 }
249 
250 static int64_t
sdiv(int64_t a,int64_t b,unsigned bit_size)251 sdiv(int64_t a, int64_t b, unsigned bit_size)
252 {
253    switch (bit_size) {
254    case 64: return (int64_t)a / (int64_t)b;
255    case 32: return (int32_t)a / (int32_t)b;
256    case 16: return (int16_t)a / (int16_t)b;
257    case 8:  return  (int8_t)a / (int8_t)b;
258    default:
259       assert(!"Invalid bit size");
260       return 0;
261    }
262 }
263 
264 static void
random_udiv_add_sat_test(unsigned bits,bool bounded)265 random_udiv_add_sat_test(unsigned bits, bool bounded)
266 {
267    for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
268       uint64_t n = rand_uint(bits, 0);
269       uint64_t d = rand_uint(bits, 2);
270       assert(uint_is_in_range(n, bits));
271       assert(uint_is_in_range(d, bits) && d >= 2);
272 
273       unsigned n_bits = bounded ? util_logbase2_64(MAX2(n, 1)) + 1 : bits;
274 
275       struct util_fast_udiv_info m =
276          util_compute_fast_udiv_info(d, n_bits, bits);
277       EXPECT_EQ(fast_udiv_add_sat(n, m, bits), udiv(n, d, bits));
278    }
279 }
280 
281 static void
random_udiv_mul_add_test(unsigned bits,bool bounded)282 random_udiv_mul_add_test(unsigned bits, bool bounded)
283 {
284    for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
285       uint64_t n = rand_uint(bits, 0);
286       uint64_t d = rand_uint(bits, 1);
287       assert(uint_is_in_range(n, bits));
288       assert(uint_is_in_range(d, bits) && d >= 1);
289 
290       unsigned n_bits = bounded ? util_logbase2_64(MAX2(n, 1)) + 1: bits;
291 
292       struct util_fast_udiv_info m =
293          util_compute_fast_udiv_info(d, n_bits, bits);
294       EXPECT_EQ(fast_udiv_mul_add(n, m, bits), udiv(n, d, bits));
295    }
296 }
297 
298 static void
random_sdiv_test(unsigned bits)299 random_sdiv_test(unsigned bits)
300 {
301    for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
302       int64_t n = rand_sint(bits, 0);
303       int64_t d;
304       do {
305          d = rand_sint(bits, 2);
306       } while (d == INT64_MIN || util_is_power_of_two_or_zero64(llabs(d)));
307 
308       assert(sint_is_in_range(n, bits));
309       assert(sint_is_in_range(d, bits) && llabs(d) >= 2);
310 
311       struct util_fast_sdiv_info m =
312          util_compute_fast_sdiv_info(d, bits);
313       EXPECT_EQ(fast_sdiv(n, d, m, bits), sdiv(n, d, bits));
314    }
315 }
316 
TEST(fast_idiv_by_const,uint8_add_sat)317 TEST(fast_idiv_by_const, uint8_add_sat)
318 {
319    /* 8-bit is small enough we can brute-force the entire space */
320    for (unsigned d = 2; d < 256; d++) {
321       for (unsigned n_bits = 1; n_bits <= 8; n_bits++) {
322          struct util_fast_udiv_info m =
323             util_compute_fast_udiv_info(d, n_bits, 8);
324 
325          for (unsigned n = 0; n < (1u << n_bits); n++)
326             EXPECT_EQ(fast_udiv_add_sat(n, m, 8), udiv(n, d, 8));
327       }
328    }
329 }
330 
TEST(fast_idiv_by_const,uint8_mul_add)331 TEST(fast_idiv_by_const, uint8_mul_add)
332 {
333    /* 8-bit is small enough we can brute-force the entire space */
334    for (unsigned d = 2; d < 256; d++) {
335       for (unsigned n_bits = 1; n_bits <= 8; n_bits++) {
336          struct util_fast_udiv_info m =
337             util_compute_fast_udiv_info(d, n_bits, 8);
338 
339          for (unsigned n = 0; n < (1u << n_bits); n++)
340             EXPECT_EQ(fast_udiv_mul_add(n, m, 8), udiv(n, d, 8));
341       }
342    }
343 }
344 
TEST(fast_idiv_by_const,int8)345 TEST(fast_idiv_by_const, int8)
346 {
347    /* 8-bit is small enough we can brute-force the entire space */
348    for (int n = -128; n < 128; n++) {
349       for (int d = -128; d < 128; d++) {
350          if (util_is_power_of_two_or_zero(abs(d)))
351             continue;
352 
353          struct util_fast_sdiv_info m =
354             util_compute_fast_sdiv_info(d, 8);
355          EXPECT_EQ(fast_sdiv(n, d, m, 8), sdiv(n, d, 8));
356       }
357    }
358 }
359 
TEST(fast_idiv_by_const,uint16_add_sat_bounded)360 TEST(fast_idiv_by_const, uint16_add_sat_bounded)
361 {
362    random_udiv_add_sat_test(16, true);
363 }
364 
TEST(fast_idiv_by_const,uint16_add_sat_full)365 TEST(fast_idiv_by_const, uint16_add_sat_full)
366 {
367    random_udiv_add_sat_test(16, false);
368 }
369 
TEST(fast_idiv_by_const,uint16_mul_add_bounded)370 TEST(fast_idiv_by_const, uint16_mul_add_bounded)
371 {
372    random_udiv_mul_add_test(16, true);
373 }
374 
TEST(fast_idiv_by_const,uint16_mul_add_full)375 TEST(fast_idiv_by_const, uint16_mul_add_full)
376 {
377    random_udiv_mul_add_test(16, false);
378 }
379 
TEST(fast_idiv_by_const,int16)380 TEST(fast_idiv_by_const, int16)
381 {
382    random_sdiv_test(16);
383 }
384 
TEST(fast_idiv_by_const,uint32_add_sat_bounded)385 TEST(fast_idiv_by_const, uint32_add_sat_bounded)
386 {
387    random_udiv_add_sat_test(32, true);
388 }
389 
TEST(fast_idiv_by_const,uint32_add_sat_full)390 TEST(fast_idiv_by_const, uint32_add_sat_full)
391 {
392    random_udiv_add_sat_test(32, false);
393 }
394 
TEST(fast_idiv_by_const,uint32_mul_add_bounded)395 TEST(fast_idiv_by_const, uint32_mul_add_bounded)
396 {
397    random_udiv_mul_add_test(32, true);
398 }
399 
TEST(fast_idiv_by_const,uint32_mul_add_full)400 TEST(fast_idiv_by_const, uint32_mul_add_full)
401 {
402    random_udiv_mul_add_test(32, false);
403 }
404 
TEST(fast_idiv_by_const,int32)405 TEST(fast_idiv_by_const, int32)
406 {
407    random_sdiv_test(32);
408 }
409 
TEST(fast_idiv_by_const,util_fast_udiv32)410 TEST(fast_idiv_by_const, util_fast_udiv32)
411 {
412    for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
413       uint32_t n = rand_uint(32, 0);
414       uint32_t d = rand_uint(32, 1);
415 
416       struct util_fast_udiv_info m =
417          util_compute_fast_udiv_info(d, 32, 32);
418       EXPECT_EQ(util_fast_udiv32(n, m), n / d);
419    }
420 }
421 
TEST(fast_idiv_by_const,util_fast_udiv32_nuw)422 TEST(fast_idiv_by_const, util_fast_udiv32_nuw)
423 {
424    for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
425       uint32_t n = rand_uint(32, 0);
426       if (n == UINT32_MAX)
427          continue;
428       uint32_t d = rand_uint(32, 1);
429 
430       struct util_fast_udiv_info m =
431          util_compute_fast_udiv_info(d, 32, 32);
432       EXPECT_EQ(util_fast_udiv32_nuw(n, m), n / d);
433    }
434 }
435 
TEST(fast_idiv_by_const,util_fast_udiv32_u31_d_not_one)436 TEST(fast_idiv_by_const, util_fast_udiv32_u31_d_not_one)
437 {
438    for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
439       uint32_t n = rand_uint(31, 0);
440       uint32_t d = rand_uint(31, 2);
441 
442       struct util_fast_udiv_info m =
443          util_compute_fast_udiv_info(d, 31, 32);
444       EXPECT_EQ(util_fast_udiv32_u31_d_not_one(n, m), n / d);
445    }
446 }
447 
TEST(fast_idiv_by_const,uint64_add_sat_bounded)448 TEST(fast_idiv_by_const, uint64_add_sat_bounded)
449 {
450    random_udiv_add_sat_test(64, true);
451 }
452 
TEST(fast_idiv_by_const,uint64_add_sat_full)453 TEST(fast_idiv_by_const, uint64_add_sat_full)
454 {
455    random_udiv_add_sat_test(64, false);
456 }
457 
TEST(fast_idiv_by_const,uint64_mul_add_bounded)458 TEST(fast_idiv_by_const, uint64_mul_add_bounded)
459 {
460    random_udiv_mul_add_test(64, true);
461 }
462 
TEST(fast_idiv_by_const,uint64_mul_add_full)463 TEST(fast_idiv_by_const, uint64_mul_add_full)
464 {
465    random_udiv_mul_add_test(64, false);
466 }
467 
TEST(fast_idiv_by_const,int64)468 TEST(fast_idiv_by_const, int64)
469 {
470    random_sdiv_test(64);
471 }
472