1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8
9 #include <cmath>
10 #include <cstddef>
11 #include <cstdlib>
12
13 #include <gtest/gtest.h>
14
15 #include <xnnpack/common.h>
16
17 #include <xnnpack/requantization-stubs.h>
18 #include "requantization-tester.h"
19
20
21 /*
22 * Precise scalar implementation using unsigned 32-bit arithmetics.
23 */
24
TEST(QU8_PRECISE__SCALAR_UNSIGNED32,exact_divide_by_po2)25 TEST(QU8_PRECISE__SCALAR_UNSIGNED32, exact_divide_by_po2) {
26 for (uint32_t s = 1; s < 32; s++) {
27 RequantizationTester()
28 .qmin(std::numeric_limits<uint8_t>::min())
29 .qmax(std::numeric_limits<uint8_t>::max())
30 .s(s)
31 .TestExactDivideByPO2(xnn_qu8_requantize_precise__scalar_unsigned32);
32 }
33 }
34
TEST(QU8_PRECISE__SCALAR_UNSIGNED32,exact_divide_by_po2_with_zero_point)35 TEST(QU8_PRECISE__SCALAR_UNSIGNED32, exact_divide_by_po2_with_zero_point) {
36 for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
37 for (uint32_t s = 1; s < 32; s++) {
38 RequantizationTester()
39 .zero_point(zero_point)
40 .qmin(std::numeric_limits<uint8_t>::min())
41 .qmax(std::numeric_limits<uint8_t>::max())
42 .s(s)
43 .TestExactDivideByPO2(xnn_qu8_requantize_precise__scalar_unsigned32);
44 }
45 }
46 }
47
TEST(QU8_PRECISE__SCALAR_UNSIGNED32,divide_by_po2_with_rounding_up)48 TEST(QU8_PRECISE__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_up) {
49 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
50 for (uint32_t s = 1; s < 32; s++) {
51 RequantizationTester()
52 .zero_point(zero_point)
53 .qmin(std::numeric_limits<uint8_t>::min())
54 .qmax(std::numeric_limits<uint8_t>::max())
55 .s(s)
56 .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_precise__scalar_unsigned32);
57 }
58 }
59 }
60
TEST(QU8_PRECISE__SCALAR_UNSIGNED32,divide_by_po2_with_rounding_down)61 TEST(QU8_PRECISE__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_down) {
62 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
63 for (uint32_t s = 1; s < 32; s++) {
64 RequantizationTester()
65 .zero_point(zero_point)
66 .qmin(std::numeric_limits<uint8_t>::min())
67 .qmax(std::numeric_limits<uint8_t>::max())
68 .s(s)
69 .TestDivideByPO2WithRoundingDown(xnn_qu8_requantize_precise__scalar_unsigned32);
70 }
71 }
72 }
73
TEST(QU8_PRECISE__SCALAR_UNSIGNED32,divide_by_po2_with_rounding_away)74 TEST(QU8_PRECISE__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_away) {
75 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
76 for (uint32_t s = 1; s < 32; s++) {
77 RequantizationTester()
78 .zero_point(zero_point)
79 .qmin(std::numeric_limits<uint8_t>::min())
80 .qmax(std::numeric_limits<uint8_t>::max())
81 .s(s)
82 .TestDivideByPO2WithRoundingAway(xnn_qu8_requantize_precise__scalar_unsigned32);
83 }
84 }
85 }
86
TEST(QU8_PRECISE__SCALAR_UNSIGNED32,special_cases)87 TEST(QU8_PRECISE__SCALAR_UNSIGNED32, special_cases) {
88 RequantizationTester()
89 .qmin(std::numeric_limits<uint8_t>::min())
90 .qmax(std::numeric_limits<uint8_t>::max())
91 .TestSpecialCases(xnn_qu8_requantize_precise__scalar_unsigned32);
92 }
93
TEST(QU8_PRECISE__SCALAR_UNSIGNED32,random_cases)94 TEST(QU8_PRECISE__SCALAR_UNSIGNED32, random_cases) {
95 RequantizationTester()
96 .qmin(std::numeric_limits<uint8_t>::min())
97 .qmax(std::numeric_limits<uint8_t>::max())
98 .zero_point(128)
99 .iterations(100)
100 .TestRandomCasesPrecise(xnn_qu8_requantize_precise__scalar_unsigned32);
101 }
102
103
104 /*
105 * Precise scalar implementation using unsigned 64-bit arithmetics.
106 */
107
TEST(QU8_PRECISE__SCALAR_UNSIGNED64,exact_divide_by_po2)108 TEST(QU8_PRECISE__SCALAR_UNSIGNED64, exact_divide_by_po2) {
109 for (uint32_t s = 1; s < 32; s++) {
110 RequantizationTester()
111 .qmin(std::numeric_limits<uint8_t>::min())
112 .qmax(std::numeric_limits<uint8_t>::max())
113 .s(s)
114 .TestExactDivideByPO2(xnn_qu8_requantize_precise__scalar_unsigned64);
115 }
116 }
117
TEST(QU8_PRECISE__SCALAR_UNSIGNED64,exact_divide_by_po2_with_zero_point)118 TEST(QU8_PRECISE__SCALAR_UNSIGNED64, exact_divide_by_po2_with_zero_point) {
119 for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
120 for (uint32_t s = 1; s < 32; s++) {
121 RequantizationTester()
122 .zero_point(zero_point)
123 .qmin(std::numeric_limits<uint8_t>::min())
124 .qmax(std::numeric_limits<uint8_t>::max())
125 .s(s)
126 .TestExactDivideByPO2(xnn_qu8_requantize_precise__scalar_unsigned64);
127 }
128 }
129 }
130
TEST(QU8_PRECISE__SCALAR_UNSIGNED64,divide_by_po2_with_rounding_up)131 TEST(QU8_PRECISE__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_up) {
132 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
133 for (uint32_t s = 1; s < 32; s++) {
134 RequantizationTester()
135 .zero_point(zero_point)
136 .qmin(std::numeric_limits<uint8_t>::min())
137 .qmax(std::numeric_limits<uint8_t>::max())
138 .s(s)
139 .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_precise__scalar_unsigned64);
140 }
141 }
142 }
143
TEST(QU8_PRECISE__SCALAR_UNSIGNED64,divide_by_po2_with_rounding_down)144 TEST(QU8_PRECISE__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_down) {
145 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
146 for (uint32_t s = 1; s < 32; s++) {
147 RequantizationTester()
148 .zero_point(zero_point)
149 .qmin(std::numeric_limits<uint8_t>::min())
150 .qmax(std::numeric_limits<uint8_t>::max())
151 .s(s)
152 .TestDivideByPO2WithRoundingDown(xnn_qu8_requantize_precise__scalar_unsigned64);
153 }
154 }
155 }
156
TEST(QU8_PRECISE__SCALAR_UNSIGNED64,divide_by_po2_with_rounding_away)157 TEST(QU8_PRECISE__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_away) {
158 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
159 for (uint32_t s = 1; s < 32; s++) {
160 RequantizationTester()
161 .zero_point(zero_point)
162 .qmin(std::numeric_limits<uint8_t>::min())
163 .qmax(std::numeric_limits<uint8_t>::max())
164 .s(s)
165 .TestDivideByPO2WithRoundingAway(xnn_qu8_requantize_precise__scalar_unsigned64);
166 }
167 }
168 }
169
TEST(QU8_PRECISE__SCALAR_UNSIGNED64,special_cases)170 TEST(QU8_PRECISE__SCALAR_UNSIGNED64, special_cases) {
171 RequantizationTester()
172 .qmin(std::numeric_limits<uint8_t>::min())
173 .qmax(std::numeric_limits<uint8_t>::max())
174 .TestSpecialCases(xnn_qu8_requantize_precise__scalar_unsigned64);
175 }
176
TEST(QU8_PRECISE__SCALAR_UNSIGNED64,random_cases)177 TEST(QU8_PRECISE__SCALAR_UNSIGNED64, random_cases) {
178 RequantizationTester()
179 .qmin(std::numeric_limits<uint8_t>::min())
180 .qmax(std::numeric_limits<uint8_t>::max())
181 .zero_point(128)
182 .iterations(100)
183 .TestRandomCasesPrecise(xnn_qu8_requantize_precise__scalar_unsigned64);
184 }
185
186
187 /*
188 * Precise scalar implementation using signed 64-bit arithmetics.
189 */
190
TEST(QU8_PRECISE__SCALAR_SIGNED64,exact_divide_by_po2)191 TEST(QU8_PRECISE__SCALAR_SIGNED64, exact_divide_by_po2) {
192 for (uint32_t s = 1; s < 32; s++) {
193 RequantizationTester()
194 .qmin(std::numeric_limits<uint8_t>::min())
195 .qmax(std::numeric_limits<uint8_t>::max())
196 .s(s)
197 .TestExactDivideByPO2(xnn_qu8_requantize_precise__scalar_signed64);
198 }
199 }
200
TEST(QU8_PRECISE__SCALAR_SIGNED64,exact_divide_by_po2_with_zero_point)201 TEST(QU8_PRECISE__SCALAR_SIGNED64, exact_divide_by_po2_with_zero_point) {
202 for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
203 for (uint32_t s = 1; s < 32; s++) {
204 RequantizationTester()
205 .zero_point(zero_point)
206 .qmin(std::numeric_limits<uint8_t>::min())
207 .qmax(std::numeric_limits<uint8_t>::max())
208 .s(s)
209 .TestExactDivideByPO2(xnn_qu8_requantize_precise__scalar_signed64);
210 }
211 }
212 }
213
TEST(QU8_PRECISE__SCALAR_SIGNED64,divide_by_po2_with_rounding_up)214 TEST(QU8_PRECISE__SCALAR_SIGNED64, divide_by_po2_with_rounding_up) {
215 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
216 for (uint32_t s = 1; s < 32; s++) {
217 RequantizationTester()
218 .zero_point(zero_point)
219 .qmin(std::numeric_limits<uint8_t>::min())
220 .qmax(std::numeric_limits<uint8_t>::max())
221 .s(s)
222 .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_precise__scalar_signed64);
223 }
224 }
225 }
226
TEST(QU8_PRECISE__SCALAR_SIGNED64,divide_by_po2_with_rounding_down)227 TEST(QU8_PRECISE__SCALAR_SIGNED64, divide_by_po2_with_rounding_down) {
228 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
229 for (uint32_t s = 1; s < 32; s++) {
230 RequantizationTester()
231 .zero_point(zero_point)
232 .qmin(std::numeric_limits<uint8_t>::min())
233 .qmax(std::numeric_limits<uint8_t>::max())
234 .s(s)
235 .TestDivideByPO2WithRoundingDown(xnn_qu8_requantize_precise__scalar_signed64);
236 }
237 }
238 }
239
TEST(QU8_PRECISE__SCALAR_SIGNED64,divide_by_po2_with_rounding_away)240 TEST(QU8_PRECISE__SCALAR_SIGNED64, divide_by_po2_with_rounding_away) {
241 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
242 for (uint32_t s = 1; s < 32; s++) {
243 RequantizationTester()
244 .zero_point(zero_point)
245 .qmin(std::numeric_limits<uint8_t>::min())
246 .qmax(std::numeric_limits<uint8_t>::max())
247 .s(s)
248 .TestDivideByPO2WithRoundingAway(xnn_qu8_requantize_precise__scalar_signed64);
249 }
250 }
251 }
252
TEST(QU8_PRECISE__SCALAR_SIGNED64,special_cases)253 TEST(QU8_PRECISE__SCALAR_SIGNED64, special_cases) {
254 RequantizationTester()
255 .qmin(std::numeric_limits<uint8_t>::min())
256 .qmax(std::numeric_limits<uint8_t>::max())
257 .TestSpecialCases(xnn_qu8_requantize_precise__scalar_signed64);
258 }
259
TEST(QU8_PRECISE__SCALAR_SIGNED64,random_cases)260 TEST(QU8_PRECISE__SCALAR_SIGNED64, random_cases) {
261 RequantizationTester()
262 .qmin(std::numeric_limits<uint8_t>::min())
263 .qmax(std::numeric_limits<uint8_t>::max())
264 .zero_point(128)
265 .iterations(100)
266 .TestRandomCasesPrecise(xnn_qu8_requantize_precise__scalar_signed64);
267 }
268
269
270 /*
271 * FP32-based scalar implementation using lrintf function.
272 */
273
TEST(QU8_FP32__SCALAR_LRINTF,random_cases)274 TEST(QU8_FP32__SCALAR_LRINTF, random_cases) {
275 RequantizationTester()
276 .qmin(std::numeric_limits<uint8_t>::min())
277 .qmax(std::numeric_limits<uint8_t>::max())
278 .iterations(1000)
279 .TestRandomCasesApproximate(xnn_qu8_requantize_fp32__scalar_lrintf);
280 }
281
282
283 /*
284 * FP32-based scalar implementation using magic trick for FP32->INT32 conversion.
285 */
286
TEST(QU8_FP32__SCALAR_MAGIC,random_cases)287 TEST(QU8_FP32__SCALAR_MAGIC, random_cases) {
288 RequantizationTester()
289 .qmin(std::numeric_limits<uint8_t>::min())
290 .qmax(std::numeric_limits<uint8_t>::max())
291 .iterations(1000)
292 .TestRandomCasesApproximate(xnn_qu8_requantize_fp32__scalar_magic);
293 }
294
295
296 /*
297 * Q31-based scalar implementation.
298 */
299
TEST(QU8_Q31__SCALAR,exact_divide_by_po2)300 TEST(QU8_Q31__SCALAR, exact_divide_by_po2) {
301 for (uint32_t s = 1; s < 32; s++) {
302 RequantizationTester()
303 .qmin(std::numeric_limits<uint8_t>::min())
304 .qmax(std::numeric_limits<uint8_t>::max())
305 .s(s)
306 .TestExactDivideByPO2(xnn_qu8_requantize_q31__scalar);
307 }
308 }
309
TEST(QU8_Q31__SCALAR,exact_divide_by_po2_with_zero_point)310 TEST(QU8_Q31__SCALAR, exact_divide_by_po2_with_zero_point) {
311 for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
312 for (uint32_t s = 1; s < 32; s++) {
313 RequantizationTester()
314 .zero_point(zero_point)
315 .qmin(std::numeric_limits<uint8_t>::min())
316 .qmax(std::numeric_limits<uint8_t>::max())
317 .s(s)
318 .TestExactDivideByPO2(xnn_qu8_requantize_q31__scalar);
319 }
320 }
321 }
322
TEST(QU8_Q31__SCALAR,divide_by_po2_with_rounding_up)323 TEST(QU8_Q31__SCALAR, divide_by_po2_with_rounding_up) {
324 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
325 for (uint32_t s = 1; s < 32; s++) {
326 RequantizationTester()
327 .zero_point(zero_point)
328 .qmin(std::numeric_limits<uint8_t>::min())
329 .qmax(std::numeric_limits<uint8_t>::max())
330 .s(s)
331 .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_q31__scalar);
332 }
333 }
334 }
335
336 /* No rounding down test - it fails because of upward bias in multiplication */
337 /* No rounding away test - it fails because of upward bias in multiplication */
338
TEST(QU8_Q31__SCALAR,special_cases)339 TEST(QU8_Q31__SCALAR, special_cases) {
340 RequantizationTester()
341 .qmin(std::numeric_limits<uint8_t>::min())
342 .qmax(std::numeric_limits<uint8_t>::max())
343 .TestSpecialCases(xnn_qu8_requantize_q31__scalar);
344 }
345
TEST(QU8_Q31__SCALAR,random_cases)346 TEST(QU8_Q31__SCALAR, random_cases) {
347 RequantizationTester()
348 .qmin(std::numeric_limits<uint8_t>::min())
349 .qmax(std::numeric_limits<uint8_t>::max())
350 .iterations(100)
351 .TestRandomCasesApproximate(xnn_qu8_requantize_q31__scalar);
352 }
353
354
355 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
356 /*
357 * Precise SSE2 implementation using floating-point shuffle.
358 */
359
TEST(QU8_PRECISE__SSE2,exact_divide_by_po2)360 TEST(QU8_PRECISE__SSE2, exact_divide_by_po2) {
361 for (uint32_t s = 1; s < 32; s++) {
362 RequantizationTester()
363 .qmin(std::numeric_limits<uint8_t>::min())
364 .qmax(std::numeric_limits<uint8_t>::max())
365 .s(s)
366 .TestExactDivideByPO2(xnn_qu8_requantize_precise__sse2);
367 }
368 }
369
TEST(QU8_PRECISE__SSE2,exact_divide_by_po2_with_zero_point)370 TEST(QU8_PRECISE__SSE2, exact_divide_by_po2_with_zero_point) {
371 for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
372 for (uint32_t s = 1; s < 32; s++) {
373 RequantizationTester()
374 .zero_point(zero_point)
375 .qmin(std::numeric_limits<uint8_t>::min())
376 .qmax(std::numeric_limits<uint8_t>::max())
377 .s(s)
378 .TestExactDivideByPO2(xnn_qu8_requantize_precise__sse2);
379 }
380 }
381 }
382
TEST(QU8_PRECISE__SSE2,divide_by_po2_with_rounding_up)383 TEST(QU8_PRECISE__SSE2, divide_by_po2_with_rounding_up) {
384 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
385 for (uint32_t s = 1; s < 32; s++) {
386 RequantizationTester()
387 .zero_point(zero_point)
388 .qmin(std::numeric_limits<uint8_t>::min())
389 .qmax(std::numeric_limits<uint8_t>::max())
390 .s(s)
391 .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_precise__sse2);
392 }
393 }
394 }
395
TEST(QU8_PRECISE__SSE2,divide_by_po2_with_rounding_down)396 TEST(QU8_PRECISE__SSE2, divide_by_po2_with_rounding_down) {
397 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
398 for (uint32_t s = 1; s < 32; s++) {
399 RequantizationTester()
400 .zero_point(zero_point)
401 .qmin(std::numeric_limits<uint8_t>::min())
402 .qmax(std::numeric_limits<uint8_t>::max())
403 .s(s)
404 .TestDivideByPO2WithRoundingDown(xnn_qu8_requantize_precise__sse2);
405 }
406 }
407 }
408
TEST(QU8_PRECISE__SSE2,divide_by_po2_with_rounding_away)409 TEST(QU8_PRECISE__SSE2, divide_by_po2_with_rounding_away) {
410 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
411 for (uint32_t s = 1; s < 32; s++) {
412 RequantizationTester()
413 .zero_point(zero_point)
414 .qmin(std::numeric_limits<uint8_t>::min())
415 .qmax(std::numeric_limits<uint8_t>::max())
416 .s(s)
417 .TestDivideByPO2WithRoundingAway(xnn_qu8_requantize_precise__sse2);
418 }
419 }
420 }
421
TEST(QU8_PRECISE__SSE2,special_cases)422 TEST(QU8_PRECISE__SSE2, special_cases) {
423 RequantizationTester()
424 .qmin(std::numeric_limits<uint8_t>::min())
425 .qmax(std::numeric_limits<uint8_t>::max())
426 .TestSpecialCases(xnn_qu8_requantize_precise__sse2);
427 }
428
TEST(QU8_PRECISE__SSE2,random_cases)429 TEST(QU8_PRECISE__SSE2, random_cases) {
430 RequantizationTester()
431 .qmin(std::numeric_limits<uint8_t>::min())
432 .qmax(std::numeric_limits<uint8_t>::max())
433 .zero_point(128)
434 .iterations(100)
435 .TestRandomCasesPrecise(xnn_qu8_requantize_precise__sse2);
436 }
437
438
439 /*
440 * Precise SSSE3 implementation using floating-point shuffle.
441 */
442
TEST(QU8_PRECISE__SSSE3,exact_divide_by_po2)443 TEST(QU8_PRECISE__SSSE3, exact_divide_by_po2) {
444 for (uint32_t s = 1; s < 32; s++) {
445 RequantizationTester()
446 .qmin(std::numeric_limits<uint8_t>::min())
447 .qmax(std::numeric_limits<uint8_t>::max())
448 .s(s)
449 .TestExactDivideByPO2(xnn_qu8_requantize_precise__ssse3);
450 }
451 }
452
TEST(QU8_PRECISE__SSSE3,exact_divide_by_po2_with_zero_point)453 TEST(QU8_PRECISE__SSSE3, exact_divide_by_po2_with_zero_point) {
454 for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
455 for (uint32_t s = 1; s < 32; s++) {
456 RequantizationTester()
457 .zero_point(zero_point)
458 .qmin(std::numeric_limits<uint8_t>::min())
459 .qmax(std::numeric_limits<uint8_t>::max())
460 .s(s)
461 .TestExactDivideByPO2(xnn_qu8_requantize_precise__ssse3);
462 }
463 }
464 }
465
TEST(QU8_PRECISE__SSSE3,divide_by_po2_with_rounding_up)466 TEST(QU8_PRECISE__SSSE3, divide_by_po2_with_rounding_up) {
467 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
468 for (uint32_t s = 1; s < 32; s++) {
469 RequantizationTester()
470 .zero_point(zero_point)
471 .qmin(std::numeric_limits<uint8_t>::min())
472 .qmax(std::numeric_limits<uint8_t>::max())
473 .s(s)
474 .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_precise__ssse3);
475 }
476 }
477 }
478
TEST(QU8_PRECISE__SSSE3,divide_by_po2_with_rounding_down)479 TEST(QU8_PRECISE__SSSE3, divide_by_po2_with_rounding_down) {
480 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
481 for (uint32_t s = 1; s < 32; s++) {
482 RequantizationTester()
483 .zero_point(zero_point)
484 .qmin(std::numeric_limits<uint8_t>::min())
485 .qmax(std::numeric_limits<uint8_t>::max())
486 .s(s)
487 .TestDivideByPO2WithRoundingDown(xnn_qu8_requantize_precise__ssse3);
488 }
489 }
490 }
491
TEST(QU8_PRECISE__SSSE3,divide_by_po2_with_rounding_away)492 TEST(QU8_PRECISE__SSSE3, divide_by_po2_with_rounding_away) {
493 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
494 for (uint32_t s = 1; s < 32; s++) {
495 RequantizationTester()
496 .zero_point(zero_point)
497 .qmin(std::numeric_limits<uint8_t>::min())
498 .qmax(std::numeric_limits<uint8_t>::max())
499 .s(s)
500 .TestDivideByPO2WithRoundingAway(xnn_qu8_requantize_precise__ssse3);
501 }
502 }
503 }
504
TEST(QU8_PRECISE__SSSE3,special_cases)505 TEST(QU8_PRECISE__SSSE3, special_cases) {
506 RequantizationTester()
507 .qmin(std::numeric_limits<uint8_t>::min())
508 .qmax(std::numeric_limits<uint8_t>::max())
509 .TestSpecialCases(xnn_qu8_requantize_precise__ssse3);
510 }
511
TEST(QU8_PRECISE__SSSE3,random_cases)512 TEST(QU8_PRECISE__SSSE3, random_cases) {
513 RequantizationTester()
514 .qmin(std::numeric_limits<uint8_t>::min())
515 .qmax(std::numeric_limits<uint8_t>::max())
516 .zero_point(128)
517 .iterations(100)
518 .TestRandomCasesPrecise(xnn_qu8_requantize_precise__ssse3);
519 }
520
521
522 /*
523 * Precise SSE4.1 implementation using static blend instruction.
524 */
525
TEST(QU8_PRECISE__SSE4,exact_divide_by_po2)526 TEST(QU8_PRECISE__SSE4, exact_divide_by_po2) {
527 for (uint32_t s = 1; s < 32; s++) {
528 RequantizationTester()
529 .qmin(std::numeric_limits<uint8_t>::min())
530 .qmax(std::numeric_limits<uint8_t>::max())
531 .s(s)
532 .TestExactDivideByPO2(xnn_qu8_requantize_precise__sse4);
533 }
534 }
535
TEST(QU8_PRECISE__SSE4,exact_divide_by_po2_with_zero_point)536 TEST(QU8_PRECISE__SSE4, exact_divide_by_po2_with_zero_point) {
537 for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
538 for (uint32_t s = 1; s < 32; s++) {
539 RequantizationTester()
540 .zero_point(zero_point)
541 .qmin(std::numeric_limits<uint8_t>::min())
542 .qmax(std::numeric_limits<uint8_t>::max())
543 .s(s)
544 .TestExactDivideByPO2(xnn_qu8_requantize_precise__sse4);
545 }
546 }
547 }
548
TEST(QU8_PRECISE__SSE4,divide_by_po2_with_rounding_up)549 TEST(QU8_PRECISE__SSE4, divide_by_po2_with_rounding_up) {
550 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
551 for (uint32_t s = 1; s < 32; s++) {
552 RequantizationTester()
553 .zero_point(zero_point)
554 .qmin(std::numeric_limits<uint8_t>::min())
555 .qmax(std::numeric_limits<uint8_t>::max())
556 .s(s)
557 .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_precise__sse4);
558 }
559 }
560 }
561
TEST(QU8_PRECISE__SSE4,divide_by_po2_with_rounding_down)562 TEST(QU8_PRECISE__SSE4, divide_by_po2_with_rounding_down) {
563 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
564 for (uint32_t s = 1; s < 32; s++) {
565 RequantizationTester()
566 .zero_point(zero_point)
567 .qmin(std::numeric_limits<uint8_t>::min())
568 .qmax(std::numeric_limits<uint8_t>::max())
569 .s(s)
570 .TestDivideByPO2WithRoundingDown(xnn_qu8_requantize_precise__sse4);
571 }
572 }
573 }
574
TEST(QU8_PRECISE__SSE4,divide_by_po2_with_rounding_away)575 TEST(QU8_PRECISE__SSE4, divide_by_po2_with_rounding_away) {
576 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
577 for (uint32_t s = 1; s < 32; s++) {
578 RequantizationTester()
579 .zero_point(zero_point)
580 .qmin(std::numeric_limits<uint8_t>::min())
581 .qmax(std::numeric_limits<uint8_t>::max())
582 .s(s)
583 .TestDivideByPO2WithRoundingAway(xnn_qu8_requantize_precise__sse4);
584 }
585 }
586 }
587
TEST(QU8_PRECISE__SSE4,special_cases)588 TEST(QU8_PRECISE__SSE4, special_cases) {
589 RequantizationTester()
590 .qmin(std::numeric_limits<uint8_t>::min())
591 .qmax(std::numeric_limits<uint8_t>::max())
592 .TestSpecialCases(xnn_qu8_requantize_precise__sse4);
593 }
594
TEST(QU8_PRECISE__SSE4,random_cases)595 TEST(QU8_PRECISE__SSE4, random_cases) {
596 RequantizationTester()
597 .qmin(std::numeric_limits<uint8_t>::min())
598 .qmax(std::numeric_limits<uint8_t>::max())
599 .zero_point(128)
600 .iterations(100)
601 .TestRandomCasesPrecise(xnn_qu8_requantize_precise__sse4);
602 }
603
604
605 /*
606 * FP32-based x86 SSE2 implementation.
607 */
608
TEST(QU8_FP32__SSE2,random_cases)609 TEST(QU8_FP32__SSE2, random_cases) {
610 RequantizationTester()
611 .qmin(std::numeric_limits<uint8_t>::min())
612 .qmax(std::numeric_limits<uint8_t>::max())
613 .iterations(1000)
614 .TestRandomCasesApproximate(xnn_qu8_requantize_fp32__sse2);
615 }
616
617
618 /*
619 * Q31-based x86 SSE2 implementation.
620 */
621
TEST(QU8_Q31__SSE2,exact_divide_by_po2)622 TEST(QU8_Q31__SSE2, exact_divide_by_po2) {
623 for (uint32_t s = 1; s < 32; s++) {
624 RequantizationTester()
625 .qmin(std::numeric_limits<uint8_t>::min())
626 .qmax(std::numeric_limits<uint8_t>::max())
627 .s(s)
628 .TestExactDivideByPO2(xnn_qu8_requantize_q31__sse2);
629 }
630 }
631
TEST(QU8_Q31__SSE2,exact_divide_by_po2_with_zero_point)632 TEST(QU8_Q31__SSE2, exact_divide_by_po2_with_zero_point) {
633 for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
634 for (uint32_t s = 1; s < 32; s++) {
635 RequantizationTester()
636 .zero_point(zero_point)
637 .qmin(std::numeric_limits<uint8_t>::min())
638 .qmax(std::numeric_limits<uint8_t>::max())
639 .s(s)
640 .TestExactDivideByPO2(xnn_qu8_requantize_q31__sse2);
641 }
642 }
643 }
644
TEST(QU8_Q31__SSE2,divide_by_po2_with_rounding_up)645 TEST(QU8_Q31__SSE2, divide_by_po2_with_rounding_up) {
646 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
647 for (uint32_t s = 1; s < 32; s++) {
648 RequantizationTester()
649 .zero_point(zero_point)
650 .qmin(std::numeric_limits<uint8_t>::min())
651 .qmax(std::numeric_limits<uint8_t>::max())
652 .s(s)
653 .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_q31__sse2);
654 }
655 }
656 }
657
658 /* No rounding down test - it fails because of upward bias in multiplication */
659 /* No rounding away test - it fails because of upward bias in multiplication */
660
TEST(QU8_Q31__SSE2,special_cases)661 TEST(QU8_Q31__SSE2, special_cases) {
662 RequantizationTester()
663 .qmin(std::numeric_limits<uint8_t>::min())
664 .qmax(std::numeric_limits<uint8_t>::max())
665 .TestSpecialCases(xnn_qu8_requantize_q31__sse2);
666 }
667
TEST(QU8_Q31__SSE2,random_cases)668 TEST(QU8_Q31__SSE2, random_cases) {
669 RequantizationTester()
670 .qmin(std::numeric_limits<uint8_t>::min())
671 .qmax(std::numeric_limits<uint8_t>::max())
672 .iterations(100)
673 .TestRandomCasesApproximate(xnn_qu8_requantize_q31__sse2);
674 }
675
676
677 /*
678 * Q31-based x86 SSSE3 implementation.
679 */
680
TEST(QU8_Q31__SSSE3,exact_divide_by_po2)681 TEST(QU8_Q31__SSSE3, exact_divide_by_po2) {
682 for (uint32_t s = 1; s < 32; s++) {
683 RequantizationTester()
684 .qmin(std::numeric_limits<uint8_t>::min())
685 .qmax(std::numeric_limits<uint8_t>::max())
686 .s(s)
687 .TestExactDivideByPO2(xnn_qu8_requantize_q31__ssse3);
688 }
689 }
690
TEST(QU8_Q31__SSSE3,exact_divide_by_po2_with_zero_point)691 TEST(QU8_Q31__SSSE3, exact_divide_by_po2_with_zero_point) {
692 for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
693 for (uint32_t s = 1; s < 32; s++) {
694 RequantizationTester()
695 .zero_point(zero_point)
696 .qmin(std::numeric_limits<uint8_t>::min())
697 .qmax(std::numeric_limits<uint8_t>::max())
698 .s(s)
699 .TestExactDivideByPO2(xnn_qu8_requantize_q31__ssse3);
700 }
701 }
702 }
703
TEST(QU8_Q31__SSSE3,divide_by_po2_with_rounding_up)704 TEST(QU8_Q31__SSSE3, divide_by_po2_with_rounding_up) {
705 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
706 for (uint32_t s = 1; s < 32; s++) {
707 RequantizationTester()
708 .zero_point(zero_point)
709 .qmin(std::numeric_limits<uint8_t>::min())
710 .qmax(std::numeric_limits<uint8_t>::max())
711 .s(s)
712 .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_q31__ssse3);
713 }
714 }
715 }
716
717 /* No rounding down test - it fails because of upward bias in multiplication */
718 /* No rounding away test - it fails because of upward bias in multiplication */
719
TEST(QU8_Q31__SSSE3,special_cases)720 TEST(QU8_Q31__SSSE3, special_cases) {
721 RequantizationTester()
722 .qmin(std::numeric_limits<uint8_t>::min())
723 .qmax(std::numeric_limits<uint8_t>::max())
724 .TestSpecialCases(xnn_qu8_requantize_q31__ssse3);
725 }
726
TEST(QU8_Q31__SSSE3,random_cases)727 TEST(QU8_Q31__SSSE3, random_cases) {
728 RequantizationTester()
729 .qmin(std::numeric_limits<uint8_t>::min())
730 .qmax(std::numeric_limits<uint8_t>::max())
731 .iterations(100)
732 .TestRandomCasesApproximate(xnn_qu8_requantize_q31__ssse3);
733 }
734
735
736 /*
737 * Q31-based x86 SSE4 implementation.
738 */
739
TEST(QU8_Q31__SSE4,exact_divide_by_po2)740 TEST(QU8_Q31__SSE4, exact_divide_by_po2) {
741 for (uint32_t s = 1; s < 32; s++) {
742 RequantizationTester()
743 .qmin(std::numeric_limits<uint8_t>::min())
744 .qmax(std::numeric_limits<uint8_t>::max())
745 .s(s)
746 .TestExactDivideByPO2(xnn_qu8_requantize_q31__sse4);
747 }
748 }
749
TEST(QU8_Q31__SSE4,exact_divide_by_po2_with_zero_point)750 TEST(QU8_Q31__SSE4, exact_divide_by_po2_with_zero_point) {
751 for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
752 for (uint32_t s = 1; s < 32; s++) {
753 RequantizationTester()
754 .zero_point(zero_point)
755 .qmin(std::numeric_limits<uint8_t>::min())
756 .qmax(std::numeric_limits<uint8_t>::max())
757 .s(s)
758 .TestExactDivideByPO2(xnn_qu8_requantize_q31__sse4);
759 }
760 }
761 }
762
TEST(QU8_Q31__SSE4,divide_by_po2_with_rounding_up)763 TEST(QU8_Q31__SSE4, divide_by_po2_with_rounding_up) {
764 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
765 for (uint32_t s = 1; s < 32; s++) {
766 RequantizationTester()
767 .zero_point(zero_point)
768 .qmin(std::numeric_limits<uint8_t>::min())
769 .qmax(std::numeric_limits<uint8_t>::max())
770 .s(s)
771 .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_q31__sse4);
772 }
773 }
774 }
775
776 /* No rounding down test - it fails because of upward bias in multiplication */
777 /* No rounding away test - it fails because of upward bias in multiplication */
778
TEST(QU8_Q31__SSE4,special_cases)779 TEST(QU8_Q31__SSE4, special_cases) {
780 RequantizationTester()
781 .qmin(std::numeric_limits<uint8_t>::min())
782 .qmax(std::numeric_limits<uint8_t>::max())
783 .TestSpecialCases(xnn_qu8_requantize_q31__sse4);
784 }
785
TEST(QU8_Q31__SSE4,random_cases)786 TEST(QU8_Q31__SSE4, random_cases) {
787 RequantizationTester()
788 .qmin(std::numeric_limits<uint8_t>::min())
789 .qmax(std::numeric_limits<uint8_t>::max())
790 .iterations(100)
791 .TestRandomCasesApproximate(xnn_qu8_requantize_q31__sse4);
792 }
793 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
794
795 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
796 /*
797 * Precise ARM NEON implementation.
798 */
799
TEST(QU8_PRECISE__NEON,exact_divide_by_po2)800 TEST(QU8_PRECISE__NEON, exact_divide_by_po2) {
801 for (uint32_t s = 1; s < 32; s++) {
802 RequantizationTester()
803 .qmin(std::numeric_limits<uint8_t>::min())
804 .qmax(std::numeric_limits<uint8_t>::max())
805 .s(s)
806 .TestExactDivideByPO2(xnn_qu8_requantize_precise__neon);
807 }
808 }
809
TEST(QU8_PRECISE__NEON,exact_divide_by_po2_with_zero_point)810 TEST(QU8_PRECISE__NEON, exact_divide_by_po2_with_zero_point) {
811 for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
812 for (uint32_t s = 1; s < 32; s++) {
813 RequantizationTester()
814 .zero_point(zero_point)
815 .qmin(std::numeric_limits<uint8_t>::min())
816 .qmax(std::numeric_limits<uint8_t>::max())
817 .s(s)
818 .TestExactDivideByPO2(xnn_qu8_requantize_precise__neon);
819 }
820 }
821 }
822
TEST(QU8_PRECISE__NEON,divide_by_po2_with_rounding_up)823 TEST(QU8_PRECISE__NEON, divide_by_po2_with_rounding_up) {
824 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
825 for (uint32_t s = 1; s < 32; s++) {
826 RequantizationTester()
827 .zero_point(zero_point)
828 .qmin(std::numeric_limits<uint8_t>::min())
829 .qmax(std::numeric_limits<uint8_t>::max())
830 .s(s)
831 .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_precise__neon);
832 }
833 }
834 }
835
TEST(QU8_PRECISE__NEON,divide_by_po2_with_rounding_down)836 TEST(QU8_PRECISE__NEON, divide_by_po2_with_rounding_down) {
837 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
838 for (uint32_t s = 1; s < 32; s++) {
839 RequantizationTester()
840 .zero_point(zero_point)
841 .qmin(std::numeric_limits<uint8_t>::min())
842 .qmax(std::numeric_limits<uint8_t>::max())
843 .s(s)
844 .TestDivideByPO2WithRoundingDown(xnn_qu8_requantize_precise__neon);
845 }
846 }
847 }
848
TEST(QU8_PRECISE__NEON,divide_by_po2_with_rounding_away)849 TEST(QU8_PRECISE__NEON, divide_by_po2_with_rounding_away) {
850 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
851 for (uint32_t s = 1; s < 32; s++) {
852 RequantizationTester()
853 .zero_point(zero_point)
854 .qmin(std::numeric_limits<uint8_t>::min())
855 .qmax(std::numeric_limits<uint8_t>::max())
856 .s(s)
857 .TestDivideByPO2WithRoundingAway(xnn_qu8_requantize_precise__neon);
858 }
859 }
860 }
861
TEST(QU8_PRECISE__NEON,special_cases)862 TEST(QU8_PRECISE__NEON, special_cases) {
863 RequantizationTester()
864 .qmin(std::numeric_limits<uint8_t>::min())
865 .qmax(std::numeric_limits<uint8_t>::max())
866 .TestSpecialCases(xnn_qu8_requantize_precise__neon);
867 }
868
TEST(QU8_PRECISE__NEON,random_cases)869 TEST(QU8_PRECISE__NEON, random_cases) {
870 RequantizationTester()
871 .qmin(std::numeric_limits<uint8_t>::min())
872 .qmax(std::numeric_limits<uint8_t>::max())
873 .zero_point(128)
874 .iterations(100)
875 .TestRandomCasesPrecise(xnn_qu8_requantize_precise__neon);
876 }
877
878
879 /*
880 * FP32-based ARM NEON implementation.
881 */
882
TEST(QU8_FP32__NEON,random_cases)883 TEST(QU8_FP32__NEON, random_cases) {
884 RequantizationTester()
885 .qmin(std::numeric_limits<uint8_t>::min())
886 .qmax(std::numeric_limits<uint8_t>::max())
887 .iterations(1000)
888 .TestRandomCasesApproximate(xnn_qu8_requantize_fp32__neon);
889 }
890
891
892 /*
893 * Q31-based ARM NEON implementation.
894 */
895
TEST(QU8_Q31__NEON,exact_divide_by_po2)896 TEST(QU8_Q31__NEON, exact_divide_by_po2) {
897 for (uint32_t s = 1; s < 32; s++) {
898 RequantizationTester()
899 .qmin(std::numeric_limits<uint8_t>::min())
900 .qmax(std::numeric_limits<uint8_t>::max())
901 .s(s)
902 .TestExactDivideByPO2(xnn_qu8_requantize_q31__neon);
903 }
904 }
905
TEST(QU8_Q31__NEON,exact_divide_by_po2_with_zero_point)906 TEST(QU8_Q31__NEON, exact_divide_by_po2_with_zero_point) {
907 for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
908 for (uint32_t s = 1; s < 32; s++) {
909 RequantizationTester()
910 .zero_point(zero_point)
911 .qmin(std::numeric_limits<uint8_t>::min())
912 .qmax(std::numeric_limits<uint8_t>::max())
913 .s(s)
914 .TestExactDivideByPO2(xnn_qu8_requantize_q31__neon);
915 }
916 }
917 }
918
TEST(QU8_Q31__NEON,divide_by_po2_with_rounding_up)919 TEST(QU8_Q31__NEON, divide_by_po2_with_rounding_up) {
920 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
921 for (uint32_t s = 1; s < 32; s++) {
922 RequantizationTester()
923 .zero_point(zero_point)
924 .qmin(std::numeric_limits<uint8_t>::min())
925 .qmax(std::numeric_limits<uint8_t>::max())
926 .s(s)
927 .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_q31__neon);
928 }
929 }
930 }
931
932 /* No rounding down test - it fails because of upward bias in multiplication */
933 /* No rounding away test - it fails because of upward bias in multiplication */
934
TEST(QU8_Q31__NEON,special_cases)935 TEST(QU8_Q31__NEON, special_cases) {
936 RequantizationTester()
937 .qmin(std::numeric_limits<uint8_t>::min())
938 .qmax(std::numeric_limits<uint8_t>::max())
939 .TestSpecialCases(xnn_qu8_requantize_q31__neon);
940 }
941
TEST(QU8_Q31__NEON,random_cases)942 TEST(QU8_Q31__NEON, random_cases) {
943 RequantizationTester()
944 .qmin(std::numeric_limits<uint8_t>::min())
945 .qmax(std::numeric_limits<uint8_t>::max())
946 .iterations(100)
947 .TestRandomCasesApproximate(xnn_qu8_requantize_q31__neon);
948 }
949 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
950
951 #if XNN_ARCH_WASMSIMD
952 /*
953 * FP32-based ARM NEON implementation.
954 */
955
TEST(QU8_FP32__WASMSIMD,random_cases)956 TEST(QU8_FP32__WASMSIMD, random_cases) {
957 RequantizationTester()
958 .qmin(std::numeric_limits<uint8_t>::min())
959 .qmax(std::numeric_limits<uint8_t>::max())
960 .iterations(1000)
961 .TestRandomCasesApproximate(xnn_qu8_requantize_fp32__wasmsimd);
962 }
963
964
965 /*
966 * Q31-based WAsmd SIMD implementation.
967 */
968
TEST(QU8_Q31__WASMSIMD,exact_divide_by_po2)969 TEST(QU8_Q31__WASMSIMD, exact_divide_by_po2) {
970 for (uint32_t s = 1; s < 32; s++) {
971 RequantizationTester()
972 .qmin(std::numeric_limits<uint8_t>::min())
973 .qmax(std::numeric_limits<uint8_t>::max())
974 .s(s)
975 .TestExactDivideByPO2(xnn_qu8_requantize_q31__wasmsimd);
976 }
977 }
978
TEST(QU8_Q31__WASMSIMD,exact_divide_by_po2_with_zero_point)979 TEST(QU8_Q31__WASMSIMD, exact_divide_by_po2_with_zero_point) {
980 for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
981 for (uint32_t s = 1; s < 32; s++) {
982 RequantizationTester()
983 .zero_point(zero_point)
984 .qmin(std::numeric_limits<uint8_t>::min())
985 .qmax(std::numeric_limits<uint8_t>::max())
986 .s(s)
987 .TestExactDivideByPO2(xnn_qu8_requantize_q31__wasmsimd);
988 }
989 }
990 }
991
TEST(QU8_Q31__WASMSIMD,divide_by_po2_with_rounding_up)992 TEST(QU8_Q31__WASMSIMD, divide_by_po2_with_rounding_up) {
993 for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
994 for (uint32_t s = 1; s < 32; s++) {
995 RequantizationTester()
996 .zero_point(zero_point)
997 .qmin(std::numeric_limits<uint8_t>::min())
998 .qmax(std::numeric_limits<uint8_t>::max())
999 .s(s)
1000 .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_q31__wasmsimd);
1001 }
1002 }
1003 }
1004
1005 /* No rounding down test - it fails because of upward bias in multiplication */
1006 /* No rounding away test - it fails because of upward bias in multiplication */
1007
TEST(QU8_Q31__WASMSIMD,special_cases)1008 TEST(QU8_Q31__WASMSIMD, special_cases) {
1009 RequantizationTester()
1010 .qmin(std::numeric_limits<uint8_t>::min())
1011 .qmax(std::numeric_limits<uint8_t>::max())
1012 .TestSpecialCases(xnn_qu8_requantize_q31__wasmsimd);
1013 }
1014
TEST(QU8_Q31__WASMSIMD,random_cases)1015 TEST(QU8_Q31__WASMSIMD, random_cases) {
1016 RequantizationTester()
1017 .qmin(std::numeric_limits<uint8_t>::min())
1018 .qmax(std::numeric_limits<uint8_t>::max())
1019 .iterations(100)
1020 .TestRandomCasesApproximate(xnn_qu8_requantize_q31__wasmsimd);
1021 }
1022 #endif // XNN_ARCH_WASMSIMD
1023