1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #undef NDEBUG
25
26 #include <gtest/gtest.h>
27 #include "util/bigmath.h"
28 #include "util/fast_idiv_by_const.h"
29 #include "util/u_math.h"
30
31 #define RAND_TEST_ITERATIONS 100000
32
33 static inline uint64_t
utrunc(uint64_t x,unsigned num_bits)34 utrunc(uint64_t x, unsigned num_bits)
35 {
36 if (num_bits == 64)
37 return x;
38
39 return (x << (64 - num_bits)) >> (64 - num_bits);
40 }
41
42 static inline int64_t
strunc(int64_t x,unsigned num_bits)43 strunc(int64_t x, unsigned num_bits)
44 {
45 if (num_bits == 64)
46 return x;
47
48 return (int64_t)((uint64_t)x << (64 - num_bits)) >> (64 - num_bits);
49 }
50
51 static inline bool
uint_is_in_range(uint64_t x,unsigned num_bits)52 uint_is_in_range(uint64_t x, unsigned num_bits)
53 {
54 if (num_bits == 64)
55 return true;
56
57 return x < (1ull << num_bits);
58 }
59
60 static inline bool
sint_is_in_range(int64_t x,unsigned num_bits)61 sint_is_in_range(int64_t x, unsigned num_bits)
62 {
63 if (num_bits == 64)
64 return true;
65
66 return x >= -(1ll << (num_bits - 1)) &&
67 x < (1ll << (num_bits - 1));
68 }
69
70 static inline uint64_t
uadd_sat(uint64_t a,uint64_t b,unsigned num_bits)71 uadd_sat(uint64_t a, uint64_t b, unsigned num_bits)
72 {
73 assert(uint_is_in_range(a, num_bits));
74 assert(uint_is_in_range(b, num_bits));
75
76 uint64_t sum = a + b;
77 if (num_bits == 64) {
78 /* Standard overflow check */
79 return sum < a ? UINT64_MAX : sum;
80 } else {
81 /* Check if sum is more than num_bits */
82 return (sum >> num_bits) ? u_uintN_max(num_bits) : sum;
83 }
84 }
85
86 static inline uint64_t
umul_add_high(uint64_t a,uint64_t b,uint64_t c,unsigned num_bits)87 umul_add_high(uint64_t a, uint64_t b, uint64_t c, unsigned num_bits)
88 {
89 assert(uint_is_in_range(a, num_bits));
90 assert(uint_is_in_range(b, num_bits));
91 assert(uint_is_in_range(c, num_bits));
92
93 if (num_bits == 64) {
94 uint32_t a32[2] = { (uint32_t)a, (uint32_t)(a >> 32) };
95 uint32_t b32[2] = { (uint32_t)b, (uint32_t)(b >> 32) };
96 uint32_t c32[2] = { (uint32_t)c, (uint32_t)(c >> 32) };
97
98 uint32_t ab32[4];
99 ubm_mul_u32arr(ab32, a32, b32);
100
101 uint32_t abc32[4];
102 ubm_add_u32arr(abc32, ab32, c32);
103
104 return abc32[2] | ((uint64_t)abc32[3] << 32);
105 } else {
106 assert(num_bits <= 32);
107 return utrunc(((a * b) + c) >> num_bits, num_bits);
108 }
109 }
110
111 static inline int64_t
smul_high(int64_t a,int64_t b,unsigned num_bits)112 smul_high(int64_t a, int64_t b, unsigned num_bits)
113 {
114 assert(sint_is_in_range(a, num_bits));
115 assert(sint_is_in_range(b, num_bits));
116
117 if (num_bits == 64) {
118 uint32_t a32[4] = {
119 (uint32_t)a,
120 (uint32_t)(a >> 32),
121 (uint32_t)(a >> 63), /* sign extend */
122 (uint32_t)(a >> 63), /* sign extend */
123 };
124 uint32_t b32[4] = {
125 (uint32_t)b,
126 (uint32_t)(b >> 32),
127 (uint32_t)(b >> 63), /* sign extend */
128 (uint32_t)(b >> 63), /* sign extend */
129 };
130
131 uint32_t ab32[4];
132 ubm_mul_u32arr(ab32, a32, b32);
133
134 return ab32[2] | ((uint64_t)ab32[3] << 32);
135 } else {
136 assert(num_bits <= 32);
137 return strunc((a * b) >> num_bits, num_bits);
138 }
139 }
140
141 static inline uint64_t
fast_udiv_add_sat(uint64_t n,struct util_fast_udiv_info m,unsigned num_bits)142 fast_udiv_add_sat(uint64_t n, struct util_fast_udiv_info m, unsigned num_bits)
143 {
144 assert(uint_is_in_range(n, num_bits));
145 assert(uint_is_in_range(m.multiplier, num_bits));
146
147 n = n >> m.pre_shift;
148 n = uadd_sat(n, m.increment, num_bits);
149 n = umul_add_high(n, m.multiplier, 0, num_bits);
150 n = n >> m.post_shift;
151
152 return n;
153 }
154
155 static inline uint64_t
fast_udiv_mul_add(uint64_t n,struct util_fast_udiv_info m,unsigned num_bits)156 fast_udiv_mul_add(uint64_t n, struct util_fast_udiv_info m, unsigned num_bits)
157 {
158 assert(uint_is_in_range(n, num_bits));
159 assert(uint_is_in_range(m.multiplier, num_bits));
160
161 n = n >> m.pre_shift;
162 n = umul_add_high(n, m.multiplier,
163 m.increment ? m.multiplier : 0,
164 num_bits);
165 n = n >> m.post_shift;
166
167 return n;
168 }
169
170 static inline uint64_t
fast_sdiv(int64_t n,int64_t d,struct util_fast_sdiv_info m,unsigned num_bits)171 fast_sdiv(int64_t n, int64_t d, struct util_fast_sdiv_info m, unsigned num_bits)
172 {
173 assert(sint_is_in_range(n, num_bits));
174 assert(sint_is_in_range(d, num_bits));
175 assert(sint_is_in_range(m.multiplier, num_bits));
176
177 int64_t res;
178 res = smul_high(n, m.multiplier, num_bits);
179 if (d > 0 && m.multiplier < 0)
180 res = strunc(res + n, num_bits);
181 if (d < 0 && m.multiplier > 0)
182 res = strunc(res - n, num_bits);
183 res = res >> m.shift;
184 res = res - (res >> (num_bits - 1));
185
186 return res;
187 }
188
189 static uint64_t
rand_uint(unsigned bits,unsigned min)190 rand_uint(unsigned bits, unsigned min)
191 {
192 assert(bits >= 4);
193
194 /* Make sure we get some small and large numbers and powers of two every
195 * once in a while
196 */
197 int k = rand() % 64;
198 if (k == 17) {
199 return min + (rand() % 16);
200 } else if (k == 42) {
201 return u_uintN_max(bits) - (rand() % 16);
202 } else if (k == 9) {
203 uint64_t r;
204 do {
205 r = 1ull << (rand() % bits);
206 } while (r < min);
207 return r;
208 }
209
210 if (min == 0) {
211 assert(bits <= 64);
212 uint64_t r = 0;
213 for (unsigned i = 0; i < 8; i++)
214 r |= ((uint64_t)rand() & 0xf) << i * 8;
215 return r >> (63 - (rand() % bits));
216 } else {
217 uint64_t r;
218 do {
219 r = rand_uint(bits, 0);
220 } while (r < min);
221 return r;
222 }
223 }
224
225 static int64_t
rand_sint(unsigned bits,unsigned min_abs)226 rand_sint(unsigned bits, unsigned min_abs)
227 {
228 /* Make sure we hit MIN_INT every once in a while */
229 if (rand() % 64 == 37)
230 return u_intN_min(bits);
231
232 int64_t s = rand_uint(bits - 1, min_abs);
233 return rand() & 1 ? s : -s;
234 }
235
236 static uint64_t
udiv(uint64_t a,uint64_t b,unsigned bit_size)237 udiv(uint64_t a, uint64_t b, unsigned bit_size)
238 {
239 switch (bit_size) {
240 case 64: return (uint64_t)a / (uint64_t)b;
241 case 32: return (uint32_t)a / (uint32_t)b;
242 case 16: return (uint16_t)a / (uint16_t)b;
243 case 8: return (uint8_t)a / (uint8_t)b;
244 default:
245 assert(!"Invalid bit size");
246 return 0;
247 }
248 }
249
250 static int64_t
sdiv(int64_t a,int64_t b,unsigned bit_size)251 sdiv(int64_t a, int64_t b, unsigned bit_size)
252 {
253 switch (bit_size) {
254 case 64: return (int64_t)a / (int64_t)b;
255 case 32: return (int32_t)a / (int32_t)b;
256 case 16: return (int16_t)a / (int16_t)b;
257 case 8: return (int8_t)a / (int8_t)b;
258 default:
259 assert(!"Invalid bit size");
260 return 0;
261 }
262 }
263
264 static void
random_udiv_add_sat_test(unsigned bits,bool bounded)265 random_udiv_add_sat_test(unsigned bits, bool bounded)
266 {
267 for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
268 uint64_t n = rand_uint(bits, 0);
269 uint64_t d = rand_uint(bits, 2);
270 assert(uint_is_in_range(n, bits));
271 assert(uint_is_in_range(d, bits) && d >= 2);
272
273 unsigned n_bits = bounded ? util_logbase2_64(MAX2(n, 1)) + 1 : bits;
274
275 struct util_fast_udiv_info m =
276 util_compute_fast_udiv_info(d, n_bits, bits);
277 EXPECT_EQ(fast_udiv_add_sat(n, m, bits), udiv(n, d, bits));
278 }
279 }
280
281 static void
random_udiv_mul_add_test(unsigned bits,bool bounded)282 random_udiv_mul_add_test(unsigned bits, bool bounded)
283 {
284 for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
285 uint64_t n = rand_uint(bits, 0);
286 uint64_t d = rand_uint(bits, 1);
287 assert(uint_is_in_range(n, bits));
288 assert(uint_is_in_range(d, bits) && d >= 1);
289
290 unsigned n_bits = bounded ? util_logbase2_64(MAX2(n, 1)) + 1: bits;
291
292 struct util_fast_udiv_info m =
293 util_compute_fast_udiv_info(d, n_bits, bits);
294 EXPECT_EQ(fast_udiv_mul_add(n, m, bits), udiv(n, d, bits));
295 }
296 }
297
298 static void
random_sdiv_test(unsigned bits)299 random_sdiv_test(unsigned bits)
300 {
301 for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
302 int64_t n = rand_sint(bits, 0);
303 int64_t d;
304 do {
305 d = rand_sint(bits, 2);
306 } while (d == INT64_MIN || util_is_power_of_two_or_zero64(llabs(d)));
307
308 assert(sint_is_in_range(n, bits));
309 assert(sint_is_in_range(d, bits) && llabs(d) >= 2);
310
311 struct util_fast_sdiv_info m =
312 util_compute_fast_sdiv_info(d, bits);
313 EXPECT_EQ(fast_sdiv(n, d, m, bits), sdiv(n, d, bits));
314 }
315 }
316
TEST(fast_idiv_by_const,uint8_add_sat)317 TEST(fast_idiv_by_const, uint8_add_sat)
318 {
319 /* 8-bit is small enough we can brute-force the entire space */
320 for (unsigned d = 2; d < 256; d++) {
321 for (unsigned n_bits = 1; n_bits <= 8; n_bits++) {
322 struct util_fast_udiv_info m =
323 util_compute_fast_udiv_info(d, n_bits, 8);
324
325 for (unsigned n = 0; n < (1u << n_bits); n++)
326 EXPECT_EQ(fast_udiv_add_sat(n, m, 8), udiv(n, d, 8));
327 }
328 }
329 }
330
TEST(fast_idiv_by_const,uint8_mul_add)331 TEST(fast_idiv_by_const, uint8_mul_add)
332 {
333 /* 8-bit is small enough we can brute-force the entire space */
334 for (unsigned d = 2; d < 256; d++) {
335 for (unsigned n_bits = 1; n_bits <= 8; n_bits++) {
336 struct util_fast_udiv_info m =
337 util_compute_fast_udiv_info(d, n_bits, 8);
338
339 for (unsigned n = 0; n < (1u << n_bits); n++)
340 EXPECT_EQ(fast_udiv_mul_add(n, m, 8), udiv(n, d, 8));
341 }
342 }
343 }
344
TEST(fast_idiv_by_const,int8)345 TEST(fast_idiv_by_const, int8)
346 {
347 /* 8-bit is small enough we can brute-force the entire space */
348 for (int n = -128; n < 128; n++) {
349 for (int d = -128; d < 128; d++) {
350 if (util_is_power_of_two_or_zero(abs(d)))
351 continue;
352
353 struct util_fast_sdiv_info m =
354 util_compute_fast_sdiv_info(d, 8);
355 EXPECT_EQ(fast_sdiv(n, d, m, 8), sdiv(n, d, 8));
356 }
357 }
358 }
359
TEST(fast_idiv_by_const,uint16_add_sat_bounded)360 TEST(fast_idiv_by_const, uint16_add_sat_bounded)
361 {
362 random_udiv_add_sat_test(16, true);
363 }
364
TEST(fast_idiv_by_const,uint16_add_sat_full)365 TEST(fast_idiv_by_const, uint16_add_sat_full)
366 {
367 random_udiv_add_sat_test(16, false);
368 }
369
TEST(fast_idiv_by_const,uint16_mul_add_bounded)370 TEST(fast_idiv_by_const, uint16_mul_add_bounded)
371 {
372 random_udiv_mul_add_test(16, true);
373 }
374
TEST(fast_idiv_by_const,uint16_mul_add_full)375 TEST(fast_idiv_by_const, uint16_mul_add_full)
376 {
377 random_udiv_mul_add_test(16, false);
378 }
379
TEST(fast_idiv_by_const,int16)380 TEST(fast_idiv_by_const, int16)
381 {
382 random_sdiv_test(16);
383 }
384
TEST(fast_idiv_by_const,uint32_add_sat_bounded)385 TEST(fast_idiv_by_const, uint32_add_sat_bounded)
386 {
387 random_udiv_add_sat_test(32, true);
388 }
389
TEST(fast_idiv_by_const,uint32_add_sat_full)390 TEST(fast_idiv_by_const, uint32_add_sat_full)
391 {
392 random_udiv_add_sat_test(32, false);
393 }
394
TEST(fast_idiv_by_const,uint32_mul_add_bounded)395 TEST(fast_idiv_by_const, uint32_mul_add_bounded)
396 {
397 random_udiv_mul_add_test(32, true);
398 }
399
TEST(fast_idiv_by_const,uint32_mul_add_full)400 TEST(fast_idiv_by_const, uint32_mul_add_full)
401 {
402 random_udiv_mul_add_test(32, false);
403 }
404
TEST(fast_idiv_by_const,int32)405 TEST(fast_idiv_by_const, int32)
406 {
407 random_sdiv_test(32);
408 }
409
TEST(fast_idiv_by_const,util_fast_udiv32)410 TEST(fast_idiv_by_const, util_fast_udiv32)
411 {
412 for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
413 uint32_t n = rand_uint(32, 0);
414 uint32_t d = rand_uint(32, 1);
415
416 struct util_fast_udiv_info m =
417 util_compute_fast_udiv_info(d, 32, 32);
418 EXPECT_EQ(util_fast_udiv32(n, m), n / d);
419 }
420 }
421
TEST(fast_idiv_by_const,util_fast_udiv32_nuw)422 TEST(fast_idiv_by_const, util_fast_udiv32_nuw)
423 {
424 for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
425 uint32_t n = rand_uint(32, 0);
426 if (n == UINT32_MAX)
427 continue;
428 uint32_t d = rand_uint(32, 1);
429
430 struct util_fast_udiv_info m =
431 util_compute_fast_udiv_info(d, 32, 32);
432 EXPECT_EQ(util_fast_udiv32_nuw(n, m), n / d);
433 }
434 }
435
TEST(fast_idiv_by_const,util_fast_udiv32_u31_d_not_one)436 TEST(fast_idiv_by_const, util_fast_udiv32_u31_d_not_one)
437 {
438 for (unsigned i = 0; i < RAND_TEST_ITERATIONS; i++) {
439 uint32_t n = rand_uint(31, 0);
440 uint32_t d = rand_uint(31, 2);
441
442 struct util_fast_udiv_info m =
443 util_compute_fast_udiv_info(d, 31, 32);
444 EXPECT_EQ(util_fast_udiv32_u31_d_not_one(n, m), n / d);
445 }
446 }
447
TEST(fast_idiv_by_const,uint64_add_sat_bounded)448 TEST(fast_idiv_by_const, uint64_add_sat_bounded)
449 {
450 random_udiv_add_sat_test(64, true);
451 }
452
TEST(fast_idiv_by_const,uint64_add_sat_full)453 TEST(fast_idiv_by_const, uint64_add_sat_full)
454 {
455 random_udiv_add_sat_test(64, false);
456 }
457
TEST(fast_idiv_by_const,uint64_mul_add_bounded)458 TEST(fast_idiv_by_const, uint64_mul_add_bounded)
459 {
460 random_udiv_mul_add_test(64, true);
461 }
462
TEST(fast_idiv_by_const,uint64_mul_add_full)463 TEST(fast_idiv_by_const, uint64_mul_add_full)
464 {
465 random_udiv_mul_add_test(64, false);
466 }
467
TEST(fast_idiv_by_const,int64)468 TEST(fast_idiv_by_const, int64)
469 {
470 random_sdiv_test(64);
471 }
472