• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2020 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #include <cmath>
10 #include <cstddef>
11 #include <cstdlib>
12 
13 #include <gtest/gtest.h>
14 
15 #include <xnnpack/common.h>
16 
17 #include <xnnpack/requantization-stubs.h>
18 #include "requantization-tester.h"
19 
20 
21 /*
22  * Precise scalar implementation using unsigned 32-bit arithmetics.
23  */
24 
TEST(QS8_PRECISE__SCALAR_UNSIGNED32,exact_divide_by_po2)25 TEST(QS8_PRECISE__SCALAR_UNSIGNED32, exact_divide_by_po2) {
26   for (uint32_t s = 1; s < 32; s++) {
27     RequantizationTester()
28       .qmin(std::numeric_limits<int8_t>::min())
29       .qmax(std::numeric_limits<int8_t>::max())
30       .s(s)
31       .TestExactDivideByPO2(xnn_qs8_requantize_precise__scalar_unsigned32);
32   }
33 }
34 
TEST(QS8_PRECISE__SCALAR_UNSIGNED32,exact_divide_by_po2_with_zero_point)35 TEST(QS8_PRECISE__SCALAR_UNSIGNED32, exact_divide_by_po2_with_zero_point) {
36   for (int32_t zero_point = std::numeric_limits<int8_t>::min();
37        zero_point <= std::numeric_limits<int8_t>::max();
38        zero_point++)
39   {
40     for (uint32_t s = 1; s < 32; s++) {
41       RequantizationTester()
42         .zero_point(zero_point)
43         .qmin(std::numeric_limits<int8_t>::min())
44         .qmax(std::numeric_limits<int8_t>::max())
45         .s(s)
46         .TestExactDivideByPO2(xnn_qs8_requantize_precise__scalar_unsigned32);
47     }
48   }
49 }
50 
TEST(QS8_PRECISE__SCALAR_UNSIGNED32,divide_by_po2_with_rounding_up)51 TEST(QS8_PRECISE__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_up) {
52   for (int32_t zero_point = std::numeric_limits<int8_t>::min();
53        zero_point <= std::numeric_limits<int8_t>::max();
54        zero_point++)
55   {
56     for (uint32_t s = 1; s < 32; s++) {
57       RequantizationTester()
58         .zero_point(zero_point)
59         .qmin(std::numeric_limits<int8_t>::min())
60         .qmax(std::numeric_limits<int8_t>::max())
61         .s(s)
62         .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_precise__scalar_unsigned32);
63     }
64   }
65 }
66 
TEST(QS8_PRECISE__SCALAR_UNSIGNED32,divide_by_po2_with_rounding_down)67 TEST(QS8_PRECISE__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_down) {
68   for (int32_t zero_point = std::numeric_limits<int8_t>::min();
69        zero_point <= std::numeric_limits<int8_t>::max();
70        zero_point++)
71   {
72     for (uint32_t s = 1; s < 32; s++) {
73       RequantizationTester()
74         .zero_point(zero_point)
75         .qmin(std::numeric_limits<int8_t>::min())
76         .qmax(std::numeric_limits<int8_t>::max())
77         .s(s)
78         .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_precise__scalar_unsigned32);
79     }
80   }
81 }
82 
TEST(QS8_PRECISE__SCALAR_UNSIGNED32,divide_by_po2_with_rounding_away)83 TEST(QS8_PRECISE__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_away) {
84   for (int32_t zero_point = std::numeric_limits<int8_t>::min();
85        zero_point <= std::numeric_limits<int8_t>::max();
86        zero_point++)
87   {
88     for (uint32_t s = 1; s < 32; s++) {
89       RequantizationTester()
90         .zero_point(zero_point)
91         .qmin(std::numeric_limits<int8_t>::min())
92         .qmax(std::numeric_limits<int8_t>::max())
93         .s(s)
94         .TestDivideByPO2WithRoundingAway(xnn_qs8_requantize_precise__scalar_unsigned32);
95     }
96   }
97 }
98 
TEST(QS8_PRECISE__SCALAR_UNSIGNED32,special_cases)99 TEST(QS8_PRECISE__SCALAR_UNSIGNED32, special_cases) {
100   RequantizationTester()
101     .qmin(std::numeric_limits<int8_t>::min())
102     .qmax(std::numeric_limits<int8_t>::max())
103     .TestSpecialCases(xnn_qs8_requantize_precise__scalar_unsigned32);
104 }
105 
TEST(QS8_PRECISE__SCALAR_UNSIGNED32,random_cases)106 TEST(QS8_PRECISE__SCALAR_UNSIGNED32, random_cases) {
107   RequantizationTester()
108     .qmin(std::numeric_limits<int8_t>::min())
109     .qmax(std::numeric_limits<int8_t>::max())
110     .iterations(100)
111     .TestRandomCasesPrecise(xnn_qs8_requantize_precise__scalar_unsigned32);
112 }
113 
114 
115 /*
116  * Precise scalar implementation using unsigned 64-bit arithmetics.
117  */
118 
TEST(QS8_PRECISE__SCALAR_UNSIGNED64,exact_divide_by_po2)119 TEST(QS8_PRECISE__SCALAR_UNSIGNED64, exact_divide_by_po2) {
120   for (uint32_t s = 1; s < 32; s++) {
121     RequantizationTester()
122       .qmin(std::numeric_limits<int8_t>::min())
123       .qmax(std::numeric_limits<int8_t>::max())
124       .s(s)
125       .TestExactDivideByPO2(xnn_qs8_requantize_precise__scalar_unsigned64);
126   }
127 }
128 
TEST(QS8_PRECISE__SCALAR_UNSIGNED64,exact_divide_by_po2_with_zero_point)129 TEST(QS8_PRECISE__SCALAR_UNSIGNED64, exact_divide_by_po2_with_zero_point) {
130   for (int32_t zero_point = std::numeric_limits<int8_t>::min();
131        zero_point <= std::numeric_limits<int8_t>::max();
132        zero_point++)
133   {
134     for (uint32_t s = 1; s < 32; s++) {
135       RequantizationTester()
136         .zero_point(zero_point)
137         .qmin(std::numeric_limits<int8_t>::min())
138         .qmax(std::numeric_limits<int8_t>::max())
139         .s(s)
140         .TestExactDivideByPO2(xnn_qs8_requantize_precise__scalar_unsigned64);
141     }
142   }
143 }
144 
TEST(QS8_PRECISE__SCALAR_UNSIGNED64,divide_by_po2_with_rounding_up)145 TEST(QS8_PRECISE__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_up) {
146   for (int32_t zero_point = std::numeric_limits<int8_t>::min();
147        zero_point <= std::numeric_limits<int8_t>::max();
148        zero_point++)
149   {
150     for (uint32_t s = 1; s < 32; s++) {
151       RequantizationTester()
152         .zero_point(zero_point)
153         .qmin(std::numeric_limits<int8_t>::min())
154         .qmax(std::numeric_limits<int8_t>::max())
155         .s(s)
156         .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_precise__scalar_unsigned64);
157     }
158   }
159 }
160 
TEST(QS8_PRECISE__SCALAR_UNSIGNED64,divide_by_po2_with_rounding_down)161 TEST(QS8_PRECISE__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_down) {
162   for (int32_t zero_point = std::numeric_limits<int8_t>::min();
163        zero_point <= std::numeric_limits<int8_t>::max();
164        zero_point++)
165   {
166     for (uint32_t s = 1; s < 32; s++) {
167       RequantizationTester()
168         .zero_point(zero_point)
169         .qmin(std::numeric_limits<int8_t>::min())
170         .qmax(std::numeric_limits<int8_t>::max())
171         .s(s)
172         .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_precise__scalar_unsigned64);
173     }
174   }
175 }
176 
TEST(QS8_PRECISE__SCALAR_UNSIGNED64,divide_by_po2_with_rounding_away)177 TEST(QS8_PRECISE__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_away) {
178   for (int32_t zero_point = std::numeric_limits<int8_t>::min();
179        zero_point <= std::numeric_limits<int8_t>::max();
180        zero_point++)
181   {
182     for (uint32_t s = 1; s < 32; s++) {
183       RequantizationTester()
184         .zero_point(zero_point)
185         .qmin(std::numeric_limits<int8_t>::min())
186         .qmax(std::numeric_limits<int8_t>::max())
187         .s(s)
188         .TestDivideByPO2WithRoundingAway(xnn_qs8_requantize_precise__scalar_unsigned64);
189     }
190   }
191 }
192 
TEST(QS8_PRECISE__SCALAR_UNSIGNED64,special_cases)193 TEST(QS8_PRECISE__SCALAR_UNSIGNED64, special_cases) {
194   RequantizationTester()
195     .qmin(std::numeric_limits<int8_t>::min())
196     .qmax(std::numeric_limits<int8_t>::max())
197     .TestSpecialCases(xnn_qs8_requantize_precise__scalar_unsigned64);
198 }
199 
TEST(QS8_PRECISE__SCALAR_UNSIGNED64,random_cases)200 TEST(QS8_PRECISE__SCALAR_UNSIGNED64, random_cases) {
201   RequantizationTester()
202     .qmin(std::numeric_limits<int8_t>::min())
203     .qmax(std::numeric_limits<int8_t>::max())
204     .iterations(100)
205     .TestRandomCasesPrecise(xnn_qs8_requantize_precise__scalar_unsigned64);
206 }
207 
208 
209 /*
210  * Precise scalar implementation using signed 64-bit arithmetics.
211  */
212 
TEST(QS8_PRECISE__SCALAR_SIGNED64,exact_divide_by_po2)213 TEST(QS8_PRECISE__SCALAR_SIGNED64, exact_divide_by_po2) {
214   for (uint32_t s = 1; s < 32; s++) {
215     RequantizationTester()
216       .qmin(std::numeric_limits<int8_t>::min())
217       .qmax(std::numeric_limits<int8_t>::max())
218       .s(s)
219       .TestExactDivideByPO2(xnn_qs8_requantize_precise__scalar_signed64);
220   }
221 }
222 
TEST(QS8_PRECISE__SCALAR_SIGNED64,exact_divide_by_po2_with_zero_point)223 TEST(QS8_PRECISE__SCALAR_SIGNED64, exact_divide_by_po2_with_zero_point) {
224   for (int32_t zero_point = std::numeric_limits<int8_t>::min();
225        zero_point <= std::numeric_limits<int8_t>::max();
226        zero_point++)
227   {
228     for (uint32_t s = 1; s < 32; s++) {
229       RequantizationTester()
230         .zero_point(zero_point)
231         .qmin(std::numeric_limits<int8_t>::min())
232         .qmax(std::numeric_limits<int8_t>::max())
233         .s(s)
234         .TestExactDivideByPO2(xnn_qs8_requantize_precise__scalar_signed64);
235     }
236   }
237 }
238 
TEST(QS8_PRECISE__SCALAR_SIGNED64,divide_by_po2_with_rounding_up)239 TEST(QS8_PRECISE__SCALAR_SIGNED64, divide_by_po2_with_rounding_up) {
240   for (int32_t zero_point = std::numeric_limits<int8_t>::min();
241        zero_point <= std::numeric_limits<int8_t>::max();
242        zero_point++)
243   {
244     for (uint32_t s = 1; s < 32; s++) {
245       RequantizationTester()
246         .zero_point(zero_point)
247         .qmin(std::numeric_limits<int8_t>::min())
248         .qmax(std::numeric_limits<int8_t>::max())
249         .s(s)
250         .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_precise__scalar_signed64);
251     }
252   }
253 }
254 
TEST(QS8_PRECISE__SCALAR_SIGNED64,divide_by_po2_with_rounding_down)255 TEST(QS8_PRECISE__SCALAR_SIGNED64, divide_by_po2_with_rounding_down) {
256   for (int32_t zero_point = std::numeric_limits<int8_t>::min();
257        zero_point <= std::numeric_limits<int8_t>::max();
258        zero_point++)
259   {
260     for (uint32_t s = 1; s < 32; s++) {
261       RequantizationTester()
262         .zero_point(zero_point)
263         .qmin(std::numeric_limits<int8_t>::min())
264         .qmax(std::numeric_limits<int8_t>::max())
265         .s(s)
266         .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_precise__scalar_signed64);
267     }
268   }
269 }
270 
TEST(QS8_PRECISE__SCALAR_SIGNED64,divide_by_po2_with_rounding_away)271 TEST(QS8_PRECISE__SCALAR_SIGNED64, divide_by_po2_with_rounding_away) {
272   for (int32_t zero_point = std::numeric_limits<int8_t>::min();
273        zero_point <= std::numeric_limits<int8_t>::max();
274        zero_point++)
275   {
276     for (uint32_t s = 1; s < 32; s++) {
277       RequantizationTester()
278         .zero_point(zero_point)
279         .qmin(std::numeric_limits<int8_t>::min())
280         .qmax(std::numeric_limits<int8_t>::max())
281         .s(s)
282         .TestDivideByPO2WithRoundingAway(xnn_qs8_requantize_precise__scalar_signed64);
283     }
284   }
285 }
286 
TEST(QS8_PRECISE__SCALAR_SIGNED64,special_cases)287 TEST(QS8_PRECISE__SCALAR_SIGNED64, special_cases) {
288   RequantizationTester()
289     .qmin(std::numeric_limits<int8_t>::min())
290     .qmax(std::numeric_limits<int8_t>::max())
291     .TestSpecialCases(xnn_qs8_requantize_precise__scalar_signed64);
292 }
293 
TEST(QS8_PRECISE__SCALAR_SIGNED64,random_cases)294 TEST(QS8_PRECISE__SCALAR_SIGNED64, random_cases) {
295   RequantizationTester()
296     .qmin(std::numeric_limits<int8_t>::min())
297     .qmax(std::numeric_limits<int8_t>::max())
298     .iterations(100)
299     .TestRandomCasesPrecise(xnn_qs8_requantize_precise__scalar_signed64);
300 }
301 
302 
303 /*
304  * FP32-based scalar implementation using lrintf function.
305  */
306 
TEST(QS8_FP32__SCALAR_LRINTF,random_cases)307 TEST(QS8_FP32__SCALAR_LRINTF, random_cases) {
308   RequantizationTester()
309     .qmin(std::numeric_limits<int8_t>::min())
310     .qmax(std::numeric_limits<int8_t>::max())
311     .iterations(1000)
312     .TestRandomCasesApproximate(xnn_qs8_requantize_fp32__scalar_lrintf);
313 }
314 
315 
316 /*
317  * FP32-based scalar implementation using magic trick for FP32->INT32 conversion.
318  */
319 
TEST(QS8_FP32__SCALAR_MAGIC,random_cases)320 TEST(QS8_FP32__SCALAR_MAGIC, random_cases) {
321   RequantizationTester()
322     .qmin(std::numeric_limits<int8_t>::min())
323     .qmax(std::numeric_limits<int8_t>::max())
324     .iterations(1000)
325     .TestRandomCasesApproximate(xnn_qs8_requantize_fp32__scalar_magic);
326 }
327 
328 
329 /*
330  * Q31-based scalar implementation.
331  */
332 
TEST(QS8_Q31__SCALAR,exact_divide_by_po2)333 TEST(QS8_Q31__SCALAR, exact_divide_by_po2) {
334   for (uint32_t s = 1; s < 32; s++) {
335     RequantizationTester()
336       .qmin(std::numeric_limits<int8_t>::min())
337       .qmax(std::numeric_limits<int8_t>::max())
338       .s(s)
339       .TestExactDivideByPO2(xnn_qs8_requantize_q31__scalar);
340   }
341 }
342 
TEST(QS8_Q31__SCALAR,exact_divide_by_po2_with_zero_point)343 TEST(QS8_Q31__SCALAR, exact_divide_by_po2_with_zero_point) {
344   for (int32_t zero_point = std::numeric_limits<int8_t>::min();
345        zero_point <= std::numeric_limits<int8_t>::max();
346        zero_point++)
347   {
348     for (uint32_t s = 1; s < 32; s++) {
349       RequantizationTester()
350         .zero_point(zero_point)
351         .qmin(std::numeric_limits<int8_t>::min())
352         .qmax(std::numeric_limits<int8_t>::max())
353         .s(s)
354         .TestExactDivideByPO2(xnn_qs8_requantize_q31__scalar);
355     }
356   }
357 }
358 
TEST(QS8_Q31__SCALAR,divide_by_po2_with_rounding_up)359 TEST(QS8_Q31__SCALAR, divide_by_po2_with_rounding_up) {
360   for (int32_t zero_point = std::numeric_limits<int8_t>::min();
361        zero_point <= std::numeric_limits<int8_t>::max();
362        zero_point++)
363   {
364     for (uint32_t s = 1; s < 32; s++) {
365       RequantizationTester()
366         .zero_point(zero_point)
367         .qmin(std::numeric_limits<int8_t>::min())
368         .qmax(std::numeric_limits<int8_t>::max())
369         .s(s)
370         .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_q31__scalar);
371     }
372   }
373 }
374 
375 /* No rounding down test - it fails because of upward bias in multiplication */
376 /* No rounding away test - it fails because of upward bias in multiplication */
377 
TEST(QS8_Q31__SCALAR,special_cases)378 TEST(QS8_Q31__SCALAR, special_cases) {
379   RequantizationTester()
380     .qmin(std::numeric_limits<int8_t>::min())
381     .qmax(std::numeric_limits<int8_t>::max())
382     .TestSpecialCases(xnn_qs8_requantize_q31__scalar);
383 }
384 
TEST(QS8_Q31__SCALAR,random_cases)385 TEST(QS8_Q31__SCALAR, random_cases) {
386   RequantizationTester()
387     .qmin(std::numeric_limits<int8_t>::min())
388     .qmax(std::numeric_limits<int8_t>::max())
389     .iterations(100)
390     .TestRandomCasesApproximate(xnn_qs8_requantize_q31__scalar);
391 }
392 
393 
394 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
395   /*
396    * Precise SSE2 implementation using floating-point shuffle.
397    */
398 
TEST(QS8_PRECISE__SSE2,exact_divide_by_po2)399   TEST(QS8_PRECISE__SSE2, exact_divide_by_po2) {
400     for (uint32_t s = 1; s < 32; s++) {
401       RequantizationTester()
402         .qmin(std::numeric_limits<int8_t>::min())
403         .qmax(std::numeric_limits<int8_t>::max())
404         .s(s)
405         .TestExactDivideByPO2(xnn_qs8_requantize_precise__sse2);
406     }
407   }
408 
TEST(QS8_PRECISE__SSE2,exact_divide_by_po2_with_zero_point)409   TEST(QS8_PRECISE__SSE2, exact_divide_by_po2_with_zero_point) {
410     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
411          zero_point <= std::numeric_limits<int8_t>::max();
412          zero_point++)
413     {
414       for (uint32_t s = 1; s < 32; s++) {
415         RequantizationTester()
416           .zero_point(zero_point)
417           .qmin(std::numeric_limits<int8_t>::min())
418           .qmax(std::numeric_limits<int8_t>::max())
419           .s(s)
420           .TestExactDivideByPO2(xnn_qs8_requantize_precise__sse2);
421       }
422     }
423   }
424 
TEST(QS8_PRECISE__SSE2,divide_by_po2_with_rounding_up)425   TEST(QS8_PRECISE__SSE2, divide_by_po2_with_rounding_up) {
426     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
427          zero_point <= std::numeric_limits<int8_t>::max();
428          zero_point++)
429     {
430       for (uint32_t s = 1; s < 32; s++) {
431         RequantizationTester()
432           .zero_point(zero_point)
433           .qmin(std::numeric_limits<int8_t>::min())
434           .qmax(std::numeric_limits<int8_t>::max())
435           .s(s)
436           .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_precise__sse2);
437       }
438     }
439   }
440 
TEST(QS8_PRECISE__SSE2,divide_by_po2_with_rounding_down)441   TEST(QS8_PRECISE__SSE2, divide_by_po2_with_rounding_down) {
442     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
443          zero_point <= std::numeric_limits<int8_t>::max();
444          zero_point++)
445     {
446       for (uint32_t s = 1; s < 32; s++) {
447         RequantizationTester()
448           .zero_point(zero_point)
449           .qmin(std::numeric_limits<int8_t>::min())
450           .qmax(std::numeric_limits<int8_t>::max())
451           .s(s)
452           .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_precise__sse2);
453       }
454     }
455   }
456 
TEST(QS8_PRECISE__SSE2,divide_by_po2_with_rounding_away)457   TEST(QS8_PRECISE__SSE2, divide_by_po2_with_rounding_away) {
458     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
459          zero_point <= std::numeric_limits<int8_t>::max();
460          zero_point++)
461     {
462       for (uint32_t s = 1; s < 32; s++) {
463         RequantizationTester()
464           .zero_point(zero_point)
465           .qmin(std::numeric_limits<int8_t>::min())
466           .qmax(std::numeric_limits<int8_t>::max())
467           .s(s)
468           .TestDivideByPO2WithRoundingAway(xnn_qs8_requantize_precise__sse2);
469       }
470     }
471   }
472 
TEST(QS8_PRECISE__SSE2,special_cases)473   TEST(QS8_PRECISE__SSE2, special_cases) {
474     RequantizationTester()
475       .qmin(std::numeric_limits<int8_t>::min())
476       .qmax(std::numeric_limits<int8_t>::max())
477       .TestSpecialCases(xnn_qs8_requantize_precise__sse2);
478   }
479 
TEST(QS8_PRECISE__SSE2,random_cases)480   TEST(QS8_PRECISE__SSE2, random_cases) {
481     RequantizationTester()
482       .qmin(std::numeric_limits<int8_t>::min())
483       .qmax(std::numeric_limits<int8_t>::max())
484       .iterations(100)
485       .TestRandomCasesPrecise(xnn_qs8_requantize_precise__sse2);
486   }
487 
488 
489   /*
490    * Precise SSSE3 implementation using floating-point shuffle.
491    */
492 
TEST(QS8_PRECISE__SSSE3,exact_divide_by_po2)493   TEST(QS8_PRECISE__SSSE3, exact_divide_by_po2) {
494     for (uint32_t s = 1; s < 32; s++) {
495       RequantizationTester()
496         .qmin(std::numeric_limits<int8_t>::min())
497         .qmax(std::numeric_limits<int8_t>::max())
498         .s(s)
499         .TestExactDivideByPO2(xnn_qs8_requantize_precise__ssse3);
500     }
501   }
502 
TEST(QS8_PRECISE__SSSE3,exact_divide_by_po2_with_zero_point)503   TEST(QS8_PRECISE__SSSE3, exact_divide_by_po2_with_zero_point) {
504     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
505          zero_point <= std::numeric_limits<int8_t>::max();
506          zero_point++)
507     {
508       for (uint32_t s = 1; s < 32; s++) {
509         RequantizationTester()
510           .zero_point(zero_point)
511           .qmin(std::numeric_limits<int8_t>::min())
512           .qmax(std::numeric_limits<int8_t>::max())
513           .s(s)
514           .TestExactDivideByPO2(xnn_qs8_requantize_precise__ssse3);
515       }
516     }
517   }
518 
TEST(QS8_PRECISE__SSSE3,divide_by_po2_with_rounding_up)519   TEST(QS8_PRECISE__SSSE3, divide_by_po2_with_rounding_up) {
520     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
521          zero_point <= std::numeric_limits<int8_t>::max();
522          zero_point++)
523     {
524       for (uint32_t s = 1; s < 32; s++) {
525         RequantizationTester()
526           .zero_point(zero_point)
527           .qmin(std::numeric_limits<int8_t>::min())
528           .qmax(std::numeric_limits<int8_t>::max())
529           .s(s)
530           .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_precise__ssse3);
531       }
532     }
533   }
534 
TEST(QS8_PRECISE__SSSE3,divide_by_po2_with_rounding_down)535   TEST(QS8_PRECISE__SSSE3, divide_by_po2_with_rounding_down) {
536     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
537          zero_point <= std::numeric_limits<int8_t>::max();
538          zero_point++)
539     {
540       for (uint32_t s = 1; s < 32; s++) {
541         RequantizationTester()
542           .zero_point(zero_point)
543           .qmin(std::numeric_limits<int8_t>::min())
544           .qmax(std::numeric_limits<int8_t>::max())
545           .s(s)
546           .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_precise__ssse3);
547       }
548     }
549   }
550 
TEST(QS8_PRECISE__SSSE3,divide_by_po2_with_rounding_away)551   TEST(QS8_PRECISE__SSSE3, divide_by_po2_with_rounding_away) {
552     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
553          zero_point <= std::numeric_limits<int8_t>::max();
554          zero_point++)
555     {
556       for (uint32_t s = 1; s < 32; s++) {
557         RequantizationTester()
558           .zero_point(zero_point)
559           .qmin(std::numeric_limits<int8_t>::min())
560           .qmax(std::numeric_limits<int8_t>::max())
561           .s(s)
562           .TestDivideByPO2WithRoundingAway(xnn_qs8_requantize_precise__ssse3);
563       }
564     }
565   }
566 
TEST(QS8_PRECISE__SSSE3,special_cases)567   TEST(QS8_PRECISE__SSSE3, special_cases) {
568     RequantizationTester()
569       .qmin(std::numeric_limits<int8_t>::min())
570       .qmax(std::numeric_limits<int8_t>::max())
571       .TestSpecialCases(xnn_qs8_requantize_precise__ssse3);
572   }
573 
TEST(QS8_PRECISE__SSSE3,random_cases)574   TEST(QS8_PRECISE__SSSE3, random_cases) {
575     RequantizationTester()
576       .qmin(std::numeric_limits<int8_t>::min())
577       .qmax(std::numeric_limits<int8_t>::max())
578       .iterations(100)
579       .TestRandomCasesPrecise(xnn_qs8_requantize_precise__ssse3);
580   }
581 
582 
583   /*
584    * Precise SSE4.1 implementation using static blend instruction.
585    */
586 
TEST(QS8_PRECISE__SSE4,exact_divide_by_po2)587   TEST(QS8_PRECISE__SSE4, exact_divide_by_po2) {
588     for (uint32_t s = 1; s < 32; s++) {
589       RequantizationTester()
590         .qmin(std::numeric_limits<int8_t>::min())
591         .qmax(std::numeric_limits<int8_t>::max())
592         .s(s)
593         .TestExactDivideByPO2(xnn_qs8_requantize_precise__sse4);
594     }
595   }
596 
TEST(QS8_PRECISE__SSE4,exact_divide_by_po2_with_zero_point)597   TEST(QS8_PRECISE__SSE4, exact_divide_by_po2_with_zero_point) {
598     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
599          zero_point <= std::numeric_limits<int8_t>::max();
600          zero_point++)
601     {
602       for (uint32_t s = 1; s < 32; s++) {
603         RequantizationTester()
604           .zero_point(zero_point)
605           .qmin(std::numeric_limits<int8_t>::min())
606           .qmax(std::numeric_limits<int8_t>::max())
607           .s(s)
608           .TestExactDivideByPO2(xnn_qs8_requantize_precise__sse4);
609       }
610     }
611   }
612 
TEST(QS8_PRECISE__SSE4,divide_by_po2_with_rounding_up)613   TEST(QS8_PRECISE__SSE4, divide_by_po2_with_rounding_up) {
614     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
615          zero_point <= std::numeric_limits<int8_t>::max();
616          zero_point++)
617     {
618       for (uint32_t s = 1; s < 32; s++) {
619         RequantizationTester()
620           .zero_point(zero_point)
621           .qmin(std::numeric_limits<int8_t>::min())
622           .qmax(std::numeric_limits<int8_t>::max())
623           .s(s)
624           .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_precise__sse4);
625       }
626     }
627   }
628 
TEST(QS8_PRECISE__SSE4,divide_by_po2_with_rounding_down)629   TEST(QS8_PRECISE__SSE4, divide_by_po2_with_rounding_down) {
630     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
631          zero_point <= std::numeric_limits<int8_t>::max();
632          zero_point++)
633     {
634       for (uint32_t s = 1; s < 32; s++) {
635         RequantizationTester()
636           .zero_point(zero_point)
637           .qmin(std::numeric_limits<int8_t>::min())
638           .qmax(std::numeric_limits<int8_t>::max())
639           .s(s)
640           .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_precise__sse4);
641       }
642     }
643   }
644 
TEST(QS8_PRECISE__SSE4,divide_by_po2_with_rounding_away)645   TEST(QS8_PRECISE__SSE4, divide_by_po2_with_rounding_away) {
646     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
647          zero_point <= std::numeric_limits<int8_t>::max();
648          zero_point++)
649     {
650       for (uint32_t s = 1; s < 32; s++) {
651         RequantizationTester()
652           .zero_point(zero_point)
653           .qmin(std::numeric_limits<int8_t>::min())
654           .qmax(std::numeric_limits<int8_t>::max())
655           .s(s)
656           .TestDivideByPO2WithRoundingAway(xnn_qs8_requantize_precise__sse4);
657       }
658     }
659   }
660 
TEST(QS8_PRECISE__SSE4,special_cases)661   TEST(QS8_PRECISE__SSE4, special_cases) {
662     RequantizationTester()
663       .qmin(std::numeric_limits<int8_t>::min())
664       .qmax(std::numeric_limits<int8_t>::max())
665       .TestSpecialCases(xnn_qs8_requantize_precise__sse4);
666   }
667 
TEST(QS8_PRECISE__SSE4,random_cases)668   TEST(QS8_PRECISE__SSE4, random_cases) {
669     RequantizationTester()
670       .qmin(std::numeric_limits<int8_t>::min())
671       .qmax(std::numeric_limits<int8_t>::max())
672       .iterations(100)
673       .TestRandomCasesPrecise(xnn_qs8_requantize_precise__sse4);
674   }
675 
676 
677   /*
678    * FP32-based x86 SSE2 implementation.
679    */
680 
TEST(QS8_FP32__SSE2,random_cases)681   TEST(QS8_FP32__SSE2, random_cases) {
682     RequantizationTester()
683       .qmin(std::numeric_limits<int8_t>::min())
684       .qmax(std::numeric_limits<int8_t>::max())
685       .iterations(1000)
686       .TestRandomCasesApproximate(xnn_qs8_requantize_fp32__sse2);
687   }
688 
689 
690   /*
691    * FP32-based x86 SSE4 implementation.
692    */
693 
TEST(QS8_FP32__SSE4,random_cases)694   TEST(QS8_FP32__SSE4, random_cases) {
695     RequantizationTester()
696       .qmin(std::numeric_limits<int8_t>::min())
697       .qmax(std::numeric_limits<int8_t>::max())
698       .iterations(1000)
699       .TestRandomCasesApproximate(xnn_qs8_requantize_fp32__sse4);
700   }
701 
702 
703   /*
704    * Q31-based x86 SSE2 implementation.
705    */
706 
TEST(QS8_Q31__SSE2,exact_divide_by_po2)707   TEST(QS8_Q31__SSE2, exact_divide_by_po2) {
708     for (uint32_t s = 1; s < 32; s++) {
709       RequantizationTester()
710         .qmin(std::numeric_limits<int8_t>::min())
711         .qmax(std::numeric_limits<int8_t>::max())
712         .s(s)
713         .TestExactDivideByPO2(xnn_qs8_requantize_q31__sse2);
714     }
715   }
716 
TEST(QS8_Q31__SSE2,exact_divide_by_po2_with_zero_point)717   TEST(QS8_Q31__SSE2, exact_divide_by_po2_with_zero_point) {
718     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
719          zero_point <= std::numeric_limits<int8_t>::max();
720          zero_point++)
721     {
722       for (uint32_t s = 1; s < 32; s++) {
723         RequantizationTester()
724           .zero_point(zero_point)
725           .qmin(std::numeric_limits<int8_t>::min())
726           .qmax(std::numeric_limits<int8_t>::max())
727           .s(s)
728           .TestExactDivideByPO2(xnn_qs8_requantize_q31__sse2);
729       }
730     }
731   }
732 
TEST(QS8_Q31__SSE2,divide_by_po2_with_rounding_up)733   TEST(QS8_Q31__SSE2, divide_by_po2_with_rounding_up) {
734     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
735          zero_point <= std::numeric_limits<int8_t>::max();
736          zero_point++)
737     {
738       for (uint32_t s = 1; s < 32; s++) {
739         RequantizationTester()
740           .zero_point(zero_point)
741           .qmin(std::numeric_limits<int8_t>::min())
742           .qmax(std::numeric_limits<int8_t>::max())
743           .s(s)
744           .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_q31__sse2);
745       }
746     }
747   }
748 
749   /* No rounding down test - it fails because of upward bias in multiplication */
750   /* No rounding away test - it fails because of upward bias in multiplication */
751 
TEST(QS8_Q31__SSE2,special_cases)752   TEST(QS8_Q31__SSE2, special_cases) {
753     RequantizationTester()
754       .qmin(std::numeric_limits<int8_t>::min())
755       .qmax(std::numeric_limits<int8_t>::max())
756       .TestSpecialCases(xnn_qs8_requantize_q31__sse2);
757   }
758 
TEST(QS8_Q31__SSE2,random_cases)759   TEST(QS8_Q31__SSE2, random_cases) {
760     RequantizationTester()
761       .qmin(std::numeric_limits<int8_t>::min())
762       .qmax(std::numeric_limits<int8_t>::max())
763       .iterations(100)
764       .TestRandomCasesApproximate(xnn_qs8_requantize_q31__sse2);
765   }
766 
767 
768   /*
769    * Q31-based x86 SSSE3 implementation.
770    */
771 
TEST(QS8_Q31__SSSE3,exact_divide_by_po2)772   TEST(QS8_Q31__SSSE3, exact_divide_by_po2) {
773     for (uint32_t s = 1; s < 32; s++) {
774       RequantizationTester()
775         .qmin(std::numeric_limits<int8_t>::min())
776         .qmax(std::numeric_limits<int8_t>::max())
777         .s(s)
778         .TestExactDivideByPO2(xnn_qs8_requantize_q31__ssse3);
779     }
780   }
781 
TEST(QS8_Q31__SSSE3,exact_divide_by_po2_with_zero_point)782   TEST(QS8_Q31__SSSE3, exact_divide_by_po2_with_zero_point) {
783     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
784          zero_point <= std::numeric_limits<int8_t>::max();
785          zero_point++)
786     {
787       for (uint32_t s = 1; s < 32; s++) {
788         RequantizationTester()
789           .zero_point(zero_point)
790           .qmin(std::numeric_limits<int8_t>::min())
791           .qmax(std::numeric_limits<int8_t>::max())
792           .s(s)
793           .TestExactDivideByPO2(xnn_qs8_requantize_q31__ssse3);
794       }
795     }
796   }
797 
TEST(QS8_Q31__SSSE3,divide_by_po2_with_rounding_up)798   TEST(QS8_Q31__SSSE3, divide_by_po2_with_rounding_up) {
799     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
800          zero_point <= std::numeric_limits<int8_t>::max();
801          zero_point++)
802     {
803       for (uint32_t s = 1; s < 32; s++) {
804         RequantizationTester()
805           .zero_point(zero_point)
806           .qmin(std::numeric_limits<int8_t>::min())
807           .qmax(std::numeric_limits<int8_t>::max())
808           .s(s)
809           .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_q31__ssse3);
810       }
811     }
812   }
813 
814   /* No rounding down test - it fails because of upward bias in multiplication */
815   /* No rounding away test - it fails because of upward bias in multiplication */
816 
TEST(QS8_Q31__SSSE3,special_cases)817   TEST(QS8_Q31__SSSE3, special_cases) {
818     RequantizationTester()
819       .qmin(std::numeric_limits<int8_t>::min())
820       .qmax(std::numeric_limits<int8_t>::max())
821       .TestSpecialCases(xnn_qs8_requantize_q31__ssse3);
822   }
823 
TEST(QS8_Q31__SSSE3,random_cases)824   TEST(QS8_Q31__SSSE3, random_cases) {
825     RequantizationTester()
826       .qmin(std::numeric_limits<int8_t>::min())
827       .qmax(std::numeric_limits<int8_t>::max())
828       .iterations(100)
829       .TestRandomCasesApproximate(xnn_qs8_requantize_q31__ssse3);
830   }
831 
832 
833   /*
834    * Q31-based x86 SSE4 implementation.
835    */
836 
TEST(QS8_Q31__SSE4,exact_divide_by_po2)837   TEST(QS8_Q31__SSE4, exact_divide_by_po2) {
838     for (uint32_t s = 1; s < 32; s++) {
839       RequantizationTester()
840         .qmin(std::numeric_limits<int8_t>::min())
841         .qmax(std::numeric_limits<int8_t>::max())
842         .s(s)
843         .TestExactDivideByPO2(xnn_qs8_requantize_q31__sse4);
844     }
845   }
846 
TEST(QS8_Q31__SSE4,exact_divide_by_po2_with_zero_point)847   TEST(QS8_Q31__SSE4, exact_divide_by_po2_with_zero_point) {
848     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
849          zero_point <= std::numeric_limits<int8_t>::max();
850          zero_point++)
851     {
852       for (uint32_t s = 1; s < 32; s++) {
853         RequantizationTester()
854           .zero_point(zero_point)
855           .qmin(std::numeric_limits<int8_t>::min())
856           .qmax(std::numeric_limits<int8_t>::max())
857           .s(s)
858           .TestExactDivideByPO2(xnn_qs8_requantize_q31__sse4);
859       }
860     }
861   }
862 
TEST(QS8_Q31__SSE4,divide_by_po2_with_rounding_up)863   TEST(QS8_Q31__SSE4, divide_by_po2_with_rounding_up) {
864     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
865          zero_point <= std::numeric_limits<int8_t>::max();
866          zero_point++)
867     {
868       for (uint32_t s = 1; s < 32; s++) {
869         RequantizationTester()
870           .zero_point(zero_point)
871           .qmin(std::numeric_limits<int8_t>::min())
872           .qmax(std::numeric_limits<int8_t>::max())
873           .s(s)
874           .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_q31__sse4);
875       }
876     }
877   }
878 
879   /* No rounding down test - it fails because of upward bias in multiplication */
880   /* No rounding away test - it fails because of upward bias in multiplication */
881 
TEST(QS8_Q31__SSE4,special_cases)882   TEST(QS8_Q31__SSE4, special_cases) {
883     RequantizationTester()
884       .qmin(std::numeric_limits<int8_t>::min())
885       .qmax(std::numeric_limits<int8_t>::max())
886       .TestSpecialCases(xnn_qs8_requantize_q31__sse4);
887   }
888 
TEST(QS8_Q31__SSE4,random_cases)889   TEST(QS8_Q31__SSE4, random_cases) {
890     RequantizationTester()
891       .qmin(std::numeric_limits<int8_t>::min())
892       .qmax(std::numeric_limits<int8_t>::max())
893       .iterations(100)
894       .TestRandomCasesApproximate(xnn_qs8_requantize_q31__sse4);
895   }
896 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
897 
898 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
899   /*
900    * Precise ARM NEON implementation.
901    */
902 
TEST(QS8_PRECISE__NEON,exact_divide_by_po2)903   TEST(QS8_PRECISE__NEON, exact_divide_by_po2) {
904     for (uint32_t s = 1; s < 32; s++) {
905       RequantizationTester()
906         .s(s)
907         .qmin(std::numeric_limits<int8_t>::min())
908         .qmax(std::numeric_limits<int8_t>::max())
909         .TestExactDivideByPO2(xnn_qs8_requantize_precise__neon);
910     }
911   }
912 
TEST(QS8_PRECISE__NEON,exact_divide_by_po2_with_zero_point)913   TEST(QS8_PRECISE__NEON, exact_divide_by_po2_with_zero_point) {
914     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
915          zero_point <= std::numeric_limits<int8_t>::max();
916          zero_point++)
917     {
918       for (uint32_t s = 1; s < 32; s++) {
919         RequantizationTester()
920           .zero_point(zero_point)
921           .qmin(std::numeric_limits<int8_t>::min())
922           .qmax(std::numeric_limits<int8_t>::max())
923           .s(s)
924           .TestExactDivideByPO2(xnn_qs8_requantize_precise__neon);
925       }
926     }
927   }
928 
TEST(QS8_PRECISE__NEON,divide_by_po2_with_rounding_up)929   TEST(QS8_PRECISE__NEON, divide_by_po2_with_rounding_up) {
930     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
931          zero_point <= std::numeric_limits<int8_t>::max();
932          zero_point++)
933     {
934       for (uint32_t s = 1; s < 32; s++) {
935         RequantizationTester()
936           .zero_point(zero_point)
937           .qmin(std::numeric_limits<int8_t>::min())
938           .qmax(std::numeric_limits<int8_t>::max())
939           .s(s)
940           .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_precise__neon);
941       }
942     }
943   }
944 
TEST(QS8_PRECISE__NEON,divide_by_po2_with_rounding_down)945   TEST(QS8_PRECISE__NEON, divide_by_po2_with_rounding_down) {
946     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
947          zero_point <= std::numeric_limits<int8_t>::max();
948          zero_point++)
949     {
950       for (uint32_t s = 1; s < 32; s++) {
951         RequantizationTester()
952           .zero_point(zero_point)
953           .qmin(std::numeric_limits<int8_t>::min())
954           .qmax(std::numeric_limits<int8_t>::max())
955           .s(s)
956           .TestDivideByPO2WithRoundingDown(xnn_qs8_requantize_precise__neon);
957       }
958     }
959   }
960 
TEST(QS8_PRECISE__NEON,divide_by_po2_with_rounding_away)961   TEST(QS8_PRECISE__NEON, divide_by_po2_with_rounding_away) {
962     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
963          zero_point <= std::numeric_limits<int8_t>::max();
964          zero_point++)
965     {
966       for (uint32_t s = 1; s < 32; s++) {
967         RequantizationTester()
968           .zero_point(zero_point)
969           .qmin(std::numeric_limits<int8_t>::min())
970           .qmax(std::numeric_limits<int8_t>::max())
971           .s(s)
972           .TestDivideByPO2WithRoundingAway(xnn_qs8_requantize_precise__neon);
973       }
974     }
975   }
976 
TEST(QS8_PRECISE__NEON,special_cases)977   TEST(QS8_PRECISE__NEON, special_cases) {
978     RequantizationTester()
979       .qmin(std::numeric_limits<int8_t>::min())
980       .qmax(std::numeric_limits<int8_t>::max())
981       .TestSpecialCases(xnn_qs8_requantize_precise__neon);
982   }
983 
TEST(QS8_PRECISE__NEON,random_cases)984   TEST(QS8_PRECISE__NEON, random_cases) {
985     RequantizationTester()
986       .qmin(std::numeric_limits<int8_t>::min())
987       .qmax(std::numeric_limits<int8_t>::max())
988       .iterations(100)
989       .TestRandomCasesPrecise(xnn_qs8_requantize_precise__neon);
990   }
991 
992 
993   /*
994    * FP32-based ARM NEON implementation.
995    */
996 
TEST(QS8_FP32__NEON,random_cases)997   TEST(QS8_FP32__NEON, random_cases) {
998     RequantizationTester()
999       .qmin(std::numeric_limits<int8_t>::min())
1000       .qmax(std::numeric_limits<int8_t>::max())
1001       .iterations(1000)
1002       .TestRandomCasesApproximate(xnn_qs8_requantize_fp32__neon);
1003   }
1004 
1005 
1006   /*
1007    * Q31-based ARM NEON implementation.
1008    */
1009 
TEST(QS8_Q31__NEON,exact_divide_by_po2)1010   TEST(QS8_Q31__NEON, exact_divide_by_po2) {
1011     for (uint32_t s = 1; s < 32; s++) {
1012       RequantizationTester()
1013         .qmin(std::numeric_limits<int8_t>::min())
1014         .qmax(std::numeric_limits<int8_t>::max())
1015         .s(s)
1016         .TestExactDivideByPO2(xnn_qs8_requantize_q31__neon);
1017     }
1018   }
1019 
TEST(QS8_Q31__NEON,exact_divide_by_po2_with_zero_point)1020   TEST(QS8_Q31__NEON, exact_divide_by_po2_with_zero_point) {
1021     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1022          zero_point <= std::numeric_limits<int8_t>::max();
1023          zero_point++)
1024     {
1025       for (uint32_t s = 1; s < 32; s++) {
1026         RequantizationTester()
1027           .zero_point(zero_point)
1028           .qmin(std::numeric_limits<int8_t>::min())
1029           .qmax(std::numeric_limits<int8_t>::max())
1030           .s(s)
1031           .TestExactDivideByPO2(xnn_qs8_requantize_q31__neon);
1032       }
1033     }
1034   }
1035 
TEST(QS8_Q31__NEON,divide_by_po2_with_rounding_up)1036   TEST(QS8_Q31__NEON, divide_by_po2_with_rounding_up) {
1037     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1038          zero_point <= std::numeric_limits<int8_t>::max();
1039          zero_point++)
1040     {
1041       for (uint32_t s = 1; s < 32; s++) {
1042         RequantizationTester()
1043           .zero_point(zero_point)
1044           .qmin(std::numeric_limits<int8_t>::min())
1045           .qmax(std::numeric_limits<int8_t>::max())
1046           .s(s)
1047           .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_q31__neon);
1048       }
1049     }
1050   }
1051 
1052   /* No rounding down test - it fails because of upward bias in multiplication */
1053   /* No rounding away test - it fails because of upward bias in multiplication */
1054 
TEST(QS8_Q31__NEON,special_cases)1055   TEST(QS8_Q31__NEON, special_cases) {
1056     RequantizationTester()
1057       .qmin(std::numeric_limits<int8_t>::min())
1058       .qmax(std::numeric_limits<int8_t>::max())
1059       .TestSpecialCases(xnn_qs8_requantize_q31__neon);
1060   }
1061 
TEST(QS8_Q31__NEON,random_cases)1062   TEST(QS8_Q31__NEON, random_cases) {
1063     RequantizationTester()
1064       .qmin(std::numeric_limits<int8_t>::min())
1065       .qmax(std::numeric_limits<int8_t>::max())
1066       .iterations(100)
1067       .TestRandomCasesApproximate(xnn_qs8_requantize_q31__neon);
1068   }
1069 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1070 
1071 #if XNN_ARCH_WASMSIMD
1072   /*
1073    * FP32-based WAsm SIMD implementation.
1074    */
1075 
TEST(QS8_FP32__WASMSIMD,random_cases)1076   TEST(QS8_FP32__WASMSIMD, random_cases) {
1077     RequantizationTester()
1078       .qmin(std::numeric_limits<int8_t>::min())
1079       .qmax(std::numeric_limits<int8_t>::max())
1080       .iterations(1000)
1081       .TestRandomCasesApproximate(xnn_qs8_requantize_fp32__wasmsimd);
1082   }
1083 
1084   /*
1085    * Q31-based WAsm SIMD implementation.
1086    */
1087 
TEST(QS8_Q31__WASMSIMD,exact_divide_by_po2)1088   TEST(QS8_Q31__WASMSIMD, exact_divide_by_po2) {
1089     for (uint32_t s = 1; s < 32; s++) {
1090       RequantizationTester()
1091         .qmin(std::numeric_limits<int8_t>::min())
1092         .qmax(std::numeric_limits<int8_t>::max())
1093         .s(s)
1094         .TestExactDivideByPO2(xnn_qs8_requantize_q31__wasmsimd);
1095     }
1096   }
1097 
TEST(QS8_Q31__WASMSIMD,exact_divide_by_po2_with_zero_point)1098   TEST(QS8_Q31__WASMSIMD, exact_divide_by_po2_with_zero_point) {
1099     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1100          zero_point <= std::numeric_limits<int8_t>::max();
1101          zero_point++)
1102     {
1103       for (uint32_t s = 1; s < 32; s++) {
1104         RequantizationTester()
1105           .zero_point(zero_point)
1106           .qmin(std::numeric_limits<int8_t>::min())
1107           .qmax(std::numeric_limits<int8_t>::max())
1108           .s(s)
1109           .TestExactDivideByPO2(xnn_qs8_requantize_q31__wasmsimd);
1110       }
1111     }
1112   }
1113 
TEST(QS8_Q31__WASMSIMD,divide_by_po2_with_rounding_up)1114   TEST(QS8_Q31__WASMSIMD, divide_by_po2_with_rounding_up) {
1115     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
1116          zero_point <= std::numeric_limits<int8_t>::max();
1117          zero_point++)
1118     {
1119       for (uint32_t s = 1; s < 32; s++) {
1120         RequantizationTester()
1121           .zero_point(zero_point)
1122           .qmin(std::numeric_limits<int8_t>::min())
1123           .qmax(std::numeric_limits<int8_t>::max())
1124           .s(s)
1125           .TestDivideByPO2WithRoundingUp(xnn_qs8_requantize_q31__wasmsimd);
1126       }
1127     }
1128   }
1129 
1130   /* No rounding down test - it fails because of upward bias in multiplication */
1131   /* No rounding away test - it fails because of upward bias in multiplication */
1132 
TEST(QS8_Q31__WASMSIMD,special_cases)1133   TEST(QS8_Q31__WASMSIMD, special_cases) {
1134     RequantizationTester()
1135       .qmin(std::numeric_limits<int8_t>::min())
1136       .qmax(std::numeric_limits<int8_t>::max())
1137       .TestSpecialCases(xnn_qs8_requantize_q31__wasmsimd);
1138   }
1139 
TEST(QS8_Q31__WASMSIMD,random_cases)1140   TEST(QS8_Q31__WASMSIMD, random_cases) {
1141     RequantizationTester()
1142       .qmin(std::numeric_limits<int8_t>::min())
1143       .qmax(std::numeric_limits<int8_t>::max())
1144       .iterations(100)
1145       .TestRandomCasesApproximate(xnn_qs8_requantize_q31__wasmsimd);
1146   }
1147 #endif  // XNN_ARCH_WASMSIMD
1148