• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #undef NDEBUG
25 
26 #include <gtest/gtest.h>
27 #include "util/bigmath.h"
28 #include "util/fast_idiv_by_const.h"
29 #include "util/u_math.h"
30 
31 #define RAND_TEST_ITERATIONS 100000
32 
33 #define MAX_UINT(bits) \
34    (((bits) == 64) ? UINT64_MAX : ((1ull << (bits)) - 1))
35 
36 static inline uint64_t
utrunc(uint64_t x,unsigned num_bits)37 utrunc(uint64_t x, unsigned num_bits)
38 {
39    if (num_bits == 64)
40       return x;
41 
42    return (x << (64 - num_bits)) >> (64 - num_bits);
43 }
44 
45 static inline int64_t
strunc(int64_t x,unsigned num_bits)46 strunc(int64_t x, unsigned num_bits)
47 {
48    if (num_bits == 64)
49       return x;
50 
51    return (int64_t)((uint64_t)x << (64 - num_bits)) >> (64 - num_bits);
52 }
53 
54 static inline bool
uint_is_in_range(uint64_t x,unsigned num_bits)55 uint_is_in_range(uint64_t x, unsigned num_bits)
56 {
57    if (num_bits == 64)
58       return true;
59 
60    return x < (1ull << num_bits);
61 }
62 
63 static inline bool
sint_is_in_range(int64_t x,unsigned num_bits)64 sint_is_in_range(int64_t x, unsigned num_bits)
65 {
66    if (num_bits == 64)
67       return true;
68 
69    return x >= -(1ll << (num_bits - 1)) &&
70           x < (1ll << (num_bits - 1));
71 }
72 
73 static inline uint64_t
uadd_sat(uint64_t a,uint64_t b,unsigned num_bits)74 uadd_sat(uint64_t a, uint64_t b, unsigned num_bits)
75 {
76    assert(uint_is_in_range(a, num_bits));
77    assert(uint_is_in_range(b, num_bits));
78 
79    uint64_t sum = a + b;
80    if (num_bits == 64) {
81       /* Standard overflow check */
82       return sum < a ? UINT64_MAX : sum;
83    } else {
84       /* Check if sum is more than num_bits */
85       return (sum >> num_bits) ? MAX_UINT(num_bits) : sum;
86    }
87 }
88 
89 static inline uint64_t
umul_add_high(uint64_t a,uint64_t b,uint64_t c,unsigned num_bits)90 umul_add_high(uint64_t a, uint64_t b, uint64_t c, unsigned num_bits)
91 {
92    assert(uint_is_in_range(a, num_bits));
93    assert(uint_is_in_range(b, num_bits));
94    assert(uint_is_in_range(c, num_bits));
95 
96    if (num_bits == 64) {
97       uint32_t a32[2] = { (uint32_t)a, (uint32_t)(a >> 32) };
98       uint32_t b32[2] = { (uint32_t)b, (uint32_t)(b >> 32) };
99       uint32_t c32[2] = { (uint32_t)c, (uint32_t)(c >> 32) };
100 
101       uint32_t ab32[4];
102       ubm_mul_u32arr(ab32, a32, b32);
103 
104       uint32_t abc32[4];
105       ubm_add_u32arr(abc32, ab32, c32);
106 
107       return abc32[2] | ((uint64_t)abc32[3] << 32);
108    } else {
109       assert(num_bits <= 32);
110       return utrunc(((a * b) + c) >> num_bits, num_bits);
111    }
112 }
113 
114 static inline int64_t
smul_high(int64_t a,int64_t b,unsigned num_bits)115 smul_high(int64_t a, int64_t b, unsigned num_bits)
116 {
117    assert(sint_is_in_range(a, num_bits));
118    assert(sint_is_in_range(b, num_bits));
119 
120    if (num_bits == 64) {
121       uint32_t a32[4] = {
122          (uint32_t)a,
123          (uint32_t)(a >> 32),
124          (uint32_t)(a >> 63), /* sign extend */
125          (uint32_t)(a >> 63), /* sign extend */
126       };
127       uint32_t b32[4] = {
128          (uint32_t)b,
129          (uint32_t)(b >> 32),
130          (uint32_t)(b >> 63), /* sign extend */
131          (uint32_t)(b >> 63), /* sign extend */
132       };
133 
134       uint32_t ab32[4];
135       ubm_mul_u32arr(ab32, a32, b32);
136 
137       return ab32[2] | ((uint64_t)ab32[3] << 32);
138    } else {
139       assert(num_bits <= 32);
140       return strunc((a * b) >> num_bits, num_bits);
141    }
142 }
143 
144 static inline uint64_t
fast_udiv_add_sat(uint64_t n,struct util_fast_udiv_info m,unsigned num_bits)145 fast_udiv_add_sat(uint64_t n, struct util_fast_udiv_info m, unsigned num_bits)
146 {
147    assert(uint_is_in_range(n, num_bits));
148    assert(uint_is_in_range(m.multiplier, num_bits));
149 
150    n = n >> m.pre_shift;
151    n = uadd_sat(n, m.increment, num_bits);
152    n = umul_add_high(n, m.multiplier, 0, num_bits);
153    n = n >> m.post_shift;
154 
155    return n;
156 }
157 
158 static inline uint64_t
fast_udiv_mul_add(uint64_t n,struct util_fast_udiv_info m,unsigned num_bits)159 fast_udiv_mul_add(uint64_t n, struct util_fast_udiv_info m, unsigned num_bits)
160 {
161    assert(uint_is_in_range(n, num_bits));
162    assert(uint_is_in_range(m.multiplier, num_bits));
163 
164    n = n >> m.pre_shift;
165    n = umul_add_high(n, m.multiplier,
166                      m.increment ? m.multiplier : 0,
167                      num_bits);
168    n = n >> m.post_shift;
169 
170    return n;
171 }
172 
173 static inline uint64_t
fast_sdiv(int64_t n,int64_t d,struct util_fast_sdiv_info m,unsigned num_bits)174 fast_sdiv(int64_t n, int64_t d, struct util_fast_sdiv_info m, unsigned num_bits)
175 {
176    assert(sint_is_in_range(n, num_bits));
177    assert(sint_is_in_range(d, num_bits));
178    assert(sint_is_in_range(m.multiplier, num_bits));
179 
180    int64_t res;
181    res = smul_high(n, m.multiplier, num_bits);
182    if (d > 0 && m.multiplier < 0)
183       res = strunc(res + n, num_bits);
184    if (d < 0 && m.multiplier > 0)
185       res = strunc(res - n, num_bits);
186    res = res >> m.shift;
187    res = res - (res >> (num_bits - 1));
188 
189    return res;
190 }
191 
192 static uint64_t
rand_uint(unsigned bits,unsigned min)193 rand_uint(unsigned bits, unsigned min)
194 {
195    assert(bits >= 4);
196 
197    /* Make sure we get some small and large numbers and powers of two every
198     * once in a while
199     */
200    int k = rand() % 64;
201    if (k == 17) {
202       return min + (rand() % 16);
203    } else if (k == 42) {
204       return MAX_UINT(bits) - (rand() % 16);
205    } else if (k == 9) {
206       uint64_t r;
207       do {
208          r = 1ull << (rand() % bits);
209       } while (r < min);
210       return r;
211    }
212 
213    if (min == 0) {
214       assert(bits <= 64);
215       uint64_t r = 0;
216       for (unsigned i = 0; i < 8; i++)
217          r |= ((uint64_t)rand() & 0xf) << i * 8;
218       return r >> (63 - (rand() % bits));
219    } else {
220       uint64_t r;
221       do {
222          r = rand_uint(bits, 0);
223       } while (r < min);
224       return r;
225    }
226 }
227 
228 static int64_t
rand_sint(unsigned bits,unsigned min_abs)229 rand_sint(unsigned bits, unsigned min_abs)
230 {
231    /* Make sure we hit MIN_INT every once in a while */
232    if (rand() % 64 == 37)
233       return INT64_MIN >> (64 - bits);
234 
235    int64_t s = rand_uint(bits - 1, min_abs);
236    return rand() & 1 ? s : -s;
237 }
238 
239 static uint64_t
udiv(uint64_t a,uint64_t b,unsigned bit_size)240 udiv(uint64_t a, uint64_t b, unsigned bit_size)
241 {
242    switch (bit_size) {
243    case 64: return (uint64_t)a / (uint64_t)b;
244    case 32: return (uint32_t)a / (uint32_t)b;
245    case 16: return (uint16_t)a / (uint16_t)b;
246    case 8:  return  (uint8_t)a / (uint8_t)b;
247    default:
248       assert(!"Invalid bit size");
249       return 0;
250    }
251 }
252 
253 static int64_t
sdiv(int64_t a,int64_t b,unsigned bit_size)254 sdiv(int64_t a, int64_t b, unsigned bit_size)
255 {
256    switch (bit_size) {
257    case 64: return (int64_t)a / (int64_t)b;
258    case 32: return (int32_t)a / (int32_t)b;
259    case 16: return (int16_t)a / (int16_t)b;
260    case 8:  return  (int8_t)a / (int8_t)b;
261    default:
262       assert(!"Invalid bit size");
263       return 0;
264    }
265 }
266 
267 static void
random_udiv_add_sat_test(unsigned bits,bool bounded)268 random_udiv_add_sat_test(unsigned bits, bool bounded)
269 {
270    for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
271       uint64_t n = rand_uint(bits, 0);
272       uint64_t d = rand_uint(bits, 2);
273       assert(uint_is_in_range(n, bits));
274       assert(uint_is_in_range(d, bits) && d >= 2);
275 
276       unsigned n_bits = bounded ? util_logbase2_64(MAX2(n, 1)) + 1 : bits;
277 
278       struct util_fast_udiv_info m =
279          util_compute_fast_udiv_info(d, n_bits, bits);
280       EXPECT_EQ(fast_udiv_add_sat(n, m, bits), udiv(n, d, bits));
281    }
282 }
283 
284 static void
random_udiv_mul_add_test(unsigned bits,bool bounded)285 random_udiv_mul_add_test(unsigned bits, bool bounded)
286 {
287    for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
288       uint64_t n = rand_uint(bits, 0);
289       uint64_t d = rand_uint(bits, 1);
290       assert(uint_is_in_range(n, bits));
291       assert(uint_is_in_range(d, bits) && d >= 1);
292 
293       unsigned n_bits = bounded ? util_logbase2_64(MAX2(n, 1)) + 1: bits;
294 
295       struct util_fast_udiv_info m =
296          util_compute_fast_udiv_info(d, n_bits, bits);
297       EXPECT_EQ(fast_udiv_mul_add(n, m, bits), udiv(n, d, bits));
298    }
299 }
300 
301 static void
random_sdiv_test(unsigned bits)302 random_sdiv_test(unsigned bits)
303 {
304    for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
305       int64_t n = rand_sint(bits, 0);
306       int64_t d;
307       do {
308          d = rand_sint(bits, 2);
309       } while (d == INT64_MIN || util_is_power_of_two_or_zero64(llabs(d)));
310 
311       assert(sint_is_in_range(n, bits));
312       assert(sint_is_in_range(d, bits) && llabs(d) >= 2);
313 
314       struct util_fast_sdiv_info m =
315          util_compute_fast_sdiv_info(d, bits);
316       EXPECT_EQ(fast_sdiv(n, d, m, bits), sdiv(n, d, bits));
317    }
318 }
319 
TEST(fast_idiv_by_const,uint8_add_sat)320 TEST(fast_idiv_by_const, uint8_add_sat)
321 {
322    /* 8-bit is small enough we can brute-force the entire space */
323    for (unsigned d = 2; d < 256; d++) {
324       for (unsigned n_bits = 1; n_bits <= 8; n_bits++) {
325          struct util_fast_udiv_info m =
326             util_compute_fast_udiv_info(d, n_bits, 8);
327 
328          for (unsigned n = 0; n < (1u << n_bits); n++)
329             EXPECT_EQ(fast_udiv_add_sat(n, m, 8), udiv(n, d, 8));
330       }
331    }
332 }
333 
TEST(fast_idiv_by_const,uint8_mul_add)334 TEST(fast_idiv_by_const, uint8_mul_add)
335 {
336    /* 8-bit is small enough we can brute-force the entire space */
337    for (unsigned d = 2; d < 256; d++) {
338       for (unsigned n_bits = 1; n_bits <= 8; n_bits++) {
339          struct util_fast_udiv_info m =
340             util_compute_fast_udiv_info(d, n_bits, 8);
341 
342          for (unsigned n = 0; n < (1u << n_bits); n++)
343             EXPECT_EQ(fast_udiv_mul_add(n, m, 8), udiv(n, d, 8));
344       }
345    }
346 }
347 
TEST(fast_idiv_by_const,int8)348 TEST(fast_idiv_by_const, int8)
349 {
350    /* 8-bit is small enough we can brute-force the entire space */
351    for (int n = -128; n < 128; n++) {
352       for (int d = -128; d < 128; d++) {
353          if (util_is_power_of_two_or_zero(abs(d)))
354             continue;
355 
356          struct util_fast_sdiv_info m =
357             util_compute_fast_sdiv_info(d, 8);
358          EXPECT_EQ(fast_sdiv(n, d, m, 8), sdiv(n, d, 8));
359       }
360    }
361 }
362 
TEST(fast_idiv_by_const,uint16_add_sat_bounded)363 TEST(fast_idiv_by_const, uint16_add_sat_bounded)
364 {
365    random_udiv_add_sat_test(16, true);
366 }
367 
TEST(fast_idiv_by_const,uint16_add_sat_full)368 TEST(fast_idiv_by_const, uint16_add_sat_full)
369 {
370    random_udiv_add_sat_test(16, false);
371 }
372 
TEST(fast_idiv_by_const,uint16_mul_add_bounded)373 TEST(fast_idiv_by_const, uint16_mul_add_bounded)
374 {
375    random_udiv_mul_add_test(16, true);
376 }
377 
TEST(fast_idiv_by_const,uint16_mul_add_full)378 TEST(fast_idiv_by_const, uint16_mul_add_full)
379 {
380    random_udiv_mul_add_test(16, false);
381 }
382 
TEST(fast_idiv_by_const,int16)383 TEST(fast_idiv_by_const, int16)
384 {
385    random_sdiv_test(16);
386 }
387 
TEST(fast_idiv_by_const,uint32_add_sat_bounded)388 TEST(fast_idiv_by_const, uint32_add_sat_bounded)
389 {
390    random_udiv_add_sat_test(32, true);
391 }
392 
TEST(fast_idiv_by_const,uint32_add_sat_full)393 TEST(fast_idiv_by_const, uint32_add_sat_full)
394 {
395    random_udiv_add_sat_test(32, false);
396 }
397 
TEST(fast_idiv_by_const,uint32_mul_add_bounded)398 TEST(fast_idiv_by_const, uint32_mul_add_bounded)
399 {
400    random_udiv_mul_add_test(32, true);
401 }
402 
TEST(fast_idiv_by_const,uint32_mul_add_full)403 TEST(fast_idiv_by_const, uint32_mul_add_full)
404 {
405    random_udiv_mul_add_test(32, false);
406 }
407 
TEST(fast_idiv_by_const,int32)408 TEST(fast_idiv_by_const, int32)
409 {
410    random_sdiv_test(32);
411 }
412 
TEST(fast_idiv_by_const,util_fast_udiv32)413 TEST(fast_idiv_by_const, util_fast_udiv32)
414 {
415    for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
416       uint32_t n = rand_uint(32, 0);
417       uint32_t d = rand_uint(32, 1);
418 
419       struct util_fast_udiv_info m =
420          util_compute_fast_udiv_info(d, 32, 32);
421       EXPECT_EQ(util_fast_udiv32(n, m), n / d);
422    }
423 }
424 
TEST(fast_idiv_by_const,util_fast_udiv32_nuw)425 TEST(fast_idiv_by_const, util_fast_udiv32_nuw)
426 {
427    for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
428       uint32_t n = rand_uint(32, 0);
429       if (n == UINT32_MAX)
430          continue;
431       uint32_t d = rand_uint(32, 1);
432 
433       struct util_fast_udiv_info m =
434          util_compute_fast_udiv_info(d, 32, 32);
435       EXPECT_EQ(util_fast_udiv32_nuw(n, m), n / d);
436    }
437 }
438 
TEST(fast_idiv_by_const,util_fast_udiv32_u31_d_not_one)439 TEST(fast_idiv_by_const, util_fast_udiv32_u31_d_not_one)
440 {
441    for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
442       uint32_t n = rand_uint(31, 0);
443       uint32_t d = rand_uint(31, 2);
444 
445       struct util_fast_udiv_info m =
446          util_compute_fast_udiv_info(d, 31, 32);
447       EXPECT_EQ(util_fast_udiv32_u31_d_not_one(n, m), n / d);
448    }
449 }
450 
TEST(fast_idiv_by_const,uint64_add_sat_bounded)451 TEST(fast_idiv_by_const, uint64_add_sat_bounded)
452 {
453    random_udiv_add_sat_test(64, true);
454 }
455 
TEST(fast_idiv_by_const,uint64_add_sat_full)456 TEST(fast_idiv_by_const, uint64_add_sat_full)
457 {
458    random_udiv_add_sat_test(64, false);
459 }
460 
TEST(fast_idiv_by_const,uint64_mul_add_bounded)461 TEST(fast_idiv_by_const, uint64_mul_add_bounded)
462 {
463    random_udiv_mul_add_test(64, true);
464 }
465 
TEST(fast_idiv_by_const,uint64_mul_add_full)466 TEST(fast_idiv_by_const, uint64_mul_add_full)
467 {
468    random_udiv_mul_add_test(64, false);
469 }
470 
TEST(fast_idiv_by_const,int64)471 TEST(fast_idiv_by_const, int64)
472 {
473    random_sdiv_test(64);
474 }
475