• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #include <cmath>
10 #include <cstddef>
11 #include <cstdlib>
12 
13 #include <gtest/gtest.h>
14 
15 #include <xnnpack/common.h>
16 
17 #include <xnnpack/requantization-stubs.h>
18 #include "requantization-tester.h"
19 
20 
21 /*
22  * Precise scalar implementation using unsigned 32-bit arithmetics.
23  */
24 
TEST(QU8_PRECISE__SCALAR_UNSIGNED32,exact_divide_by_po2)25 TEST(QU8_PRECISE__SCALAR_UNSIGNED32, exact_divide_by_po2) {
26   for (uint32_t s = 1; s < 32; s++) {
27     RequantizationTester()
28       .qmin(std::numeric_limits<uint8_t>::min())
29       .qmax(std::numeric_limits<uint8_t>::max())
30       .s(s)
31       .TestExactDivideByPO2(xnn_qu8_requantize_precise__scalar_unsigned32);
32   }
33 }
34 
TEST(QU8_PRECISE__SCALAR_UNSIGNED32,exact_divide_by_po2_with_zero_point)35 TEST(QU8_PRECISE__SCALAR_UNSIGNED32, exact_divide_by_po2_with_zero_point) {
36   for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
37     for (uint32_t s = 1; s < 32; s++) {
38       RequantizationTester()
39         .zero_point(zero_point)
40         .qmin(std::numeric_limits<uint8_t>::min())
41         .qmax(std::numeric_limits<uint8_t>::max())
42         .s(s)
43         .TestExactDivideByPO2(xnn_qu8_requantize_precise__scalar_unsigned32);
44     }
45   }
46 }
47 
TEST(QU8_PRECISE__SCALAR_UNSIGNED32,divide_by_po2_with_rounding_up)48 TEST(QU8_PRECISE__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_up) {
49   for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
50     for (uint32_t s = 1; s < 32; s++) {
51       RequantizationTester()
52         .zero_point(zero_point)
53         .qmin(std::numeric_limits<uint8_t>::min())
54         .qmax(std::numeric_limits<uint8_t>::max())
55         .s(s)
56         .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_precise__scalar_unsigned32);
57     }
58   }
59 }
60 
TEST(QU8_PRECISE__SCALAR_UNSIGNED32,divide_by_po2_with_rounding_down)61 TEST(QU8_PRECISE__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_down) {
62   for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
63     for (uint32_t s = 1; s < 32; s++) {
64       RequantizationTester()
65         .zero_point(zero_point)
66         .qmin(std::numeric_limits<uint8_t>::min())
67         .qmax(std::numeric_limits<uint8_t>::max())
68         .s(s)
69         .TestDivideByPO2WithRoundingDown(xnn_qu8_requantize_precise__scalar_unsigned32);
70     }
71   }
72 }
73 
TEST(QU8_PRECISE__SCALAR_UNSIGNED32,divide_by_po2_with_rounding_away)74 TEST(QU8_PRECISE__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_away) {
75   for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
76     for (uint32_t s = 1; s < 32; s++) {
77       RequantizationTester()
78         .zero_point(zero_point)
79         .qmin(std::numeric_limits<uint8_t>::min())
80         .qmax(std::numeric_limits<uint8_t>::max())
81         .s(s)
82         .TestDivideByPO2WithRoundingAway(xnn_qu8_requantize_precise__scalar_unsigned32);
83     }
84   }
85 }
86 
TEST(QU8_PRECISE__SCALAR_UNSIGNED32,special_cases)87 TEST(QU8_PRECISE__SCALAR_UNSIGNED32, special_cases) {
88   RequantizationTester()
89     .qmin(std::numeric_limits<uint8_t>::min())
90     .qmax(std::numeric_limits<uint8_t>::max())
91     .TestSpecialCases(xnn_qu8_requantize_precise__scalar_unsigned32);
92 }
93 
TEST(QU8_PRECISE__SCALAR_UNSIGNED32,random_cases)94 TEST(QU8_PRECISE__SCALAR_UNSIGNED32, random_cases) {
95   RequantizationTester()
96     .qmin(std::numeric_limits<uint8_t>::min())
97     .qmax(std::numeric_limits<uint8_t>::max())
98     .zero_point(128)
99     .iterations(100)
100     .TestRandomCasesPrecise(xnn_qu8_requantize_precise__scalar_unsigned32);
101 }
102 
103 
104 /*
105  * Precise scalar implementation using unsigned 64-bit arithmetics.
106  */
107 
TEST(QU8_PRECISE__SCALAR_UNSIGNED64,exact_divide_by_po2)108 TEST(QU8_PRECISE__SCALAR_UNSIGNED64, exact_divide_by_po2) {
109   for (uint32_t s = 1; s < 32; s++) {
110     RequantizationTester()
111       .qmin(std::numeric_limits<uint8_t>::min())
112       .qmax(std::numeric_limits<uint8_t>::max())
113       .s(s)
114       .TestExactDivideByPO2(xnn_qu8_requantize_precise__scalar_unsigned64);
115   }
116 }
117 
TEST(QU8_PRECISE__SCALAR_UNSIGNED64,exact_divide_by_po2_with_zero_point)118 TEST(QU8_PRECISE__SCALAR_UNSIGNED64, exact_divide_by_po2_with_zero_point) {
119   for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
120     for (uint32_t s = 1; s < 32; s++) {
121       RequantizationTester()
122         .zero_point(zero_point)
123         .qmin(std::numeric_limits<uint8_t>::min())
124         .qmax(std::numeric_limits<uint8_t>::max())
125         .s(s)
126         .TestExactDivideByPO2(xnn_qu8_requantize_precise__scalar_unsigned64);
127     }
128   }
129 }
130 
TEST(QU8_PRECISE__SCALAR_UNSIGNED64,divide_by_po2_with_rounding_up)131 TEST(QU8_PRECISE__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_up) {
132   for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
133     for (uint32_t s = 1; s < 32; s++) {
134       RequantizationTester()
135         .zero_point(zero_point)
136         .qmin(std::numeric_limits<uint8_t>::min())
137         .qmax(std::numeric_limits<uint8_t>::max())
138         .s(s)
139         .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_precise__scalar_unsigned64);
140     }
141   }
142 }
143 
TEST(QU8_PRECISE__SCALAR_UNSIGNED64,divide_by_po2_with_rounding_down)144 TEST(QU8_PRECISE__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_down) {
145   for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
146     for (uint32_t s = 1; s < 32; s++) {
147       RequantizationTester()
148         .zero_point(zero_point)
149         .qmin(std::numeric_limits<uint8_t>::min())
150         .qmax(std::numeric_limits<uint8_t>::max())
151         .s(s)
152         .TestDivideByPO2WithRoundingDown(xnn_qu8_requantize_precise__scalar_unsigned64);
153     }
154   }
155 }
156 
TEST(QU8_PRECISE__SCALAR_UNSIGNED64,divide_by_po2_with_rounding_away)157 TEST(QU8_PRECISE__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_away) {
158   for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
159     for (uint32_t s = 1; s < 32; s++) {
160       RequantizationTester()
161         .zero_point(zero_point)
162         .qmin(std::numeric_limits<uint8_t>::min())
163         .qmax(std::numeric_limits<uint8_t>::max())
164         .s(s)
165         .TestDivideByPO2WithRoundingAway(xnn_qu8_requantize_precise__scalar_unsigned64);
166     }
167   }
168 }
169 
TEST(QU8_PRECISE__SCALAR_UNSIGNED64,special_cases)170 TEST(QU8_PRECISE__SCALAR_UNSIGNED64, special_cases) {
171   RequantizationTester()
172     .qmin(std::numeric_limits<uint8_t>::min())
173     .qmax(std::numeric_limits<uint8_t>::max())
174     .TestSpecialCases(xnn_qu8_requantize_precise__scalar_unsigned64);
175 }
176 
TEST(QU8_PRECISE__SCALAR_UNSIGNED64,random_cases)177 TEST(QU8_PRECISE__SCALAR_UNSIGNED64, random_cases) {
178   RequantizationTester()
179     .qmin(std::numeric_limits<uint8_t>::min())
180     .qmax(std::numeric_limits<uint8_t>::max())
181     .zero_point(128)
182     .iterations(100)
183     .TestRandomCasesPrecise(xnn_qu8_requantize_precise__scalar_unsigned64);
184 }
185 
186 
187 /*
188  * Precise scalar implementation using signed 64-bit arithmetics.
189  */
190 
TEST(QU8_PRECISE__SCALAR_SIGNED64,exact_divide_by_po2)191 TEST(QU8_PRECISE__SCALAR_SIGNED64, exact_divide_by_po2) {
192   for (uint32_t s = 1; s < 32; s++) {
193     RequantizationTester()
194       .qmin(std::numeric_limits<uint8_t>::min())
195       .qmax(std::numeric_limits<uint8_t>::max())
196       .s(s)
197       .TestExactDivideByPO2(xnn_qu8_requantize_precise__scalar_signed64);
198   }
199 }
200 
TEST(QU8_PRECISE__SCALAR_SIGNED64,exact_divide_by_po2_with_zero_point)201 TEST(QU8_PRECISE__SCALAR_SIGNED64, exact_divide_by_po2_with_zero_point) {
202   for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
203     for (uint32_t s = 1; s < 32; s++) {
204       RequantizationTester()
205         .zero_point(zero_point)
206         .qmin(std::numeric_limits<uint8_t>::min())
207         .qmax(std::numeric_limits<uint8_t>::max())
208         .s(s)
209         .TestExactDivideByPO2(xnn_qu8_requantize_precise__scalar_signed64);
210     }
211   }
212 }
213 
TEST(QU8_PRECISE__SCALAR_SIGNED64,divide_by_po2_with_rounding_up)214 TEST(QU8_PRECISE__SCALAR_SIGNED64, divide_by_po2_with_rounding_up) {
215   for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
216     for (uint32_t s = 1; s < 32; s++) {
217       RequantizationTester()
218         .zero_point(zero_point)
219         .qmin(std::numeric_limits<uint8_t>::min())
220         .qmax(std::numeric_limits<uint8_t>::max())
221         .s(s)
222         .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_precise__scalar_signed64);
223     }
224   }
225 }
226 
TEST(QU8_PRECISE__SCALAR_SIGNED64,divide_by_po2_with_rounding_down)227 TEST(QU8_PRECISE__SCALAR_SIGNED64, divide_by_po2_with_rounding_down) {
228   for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
229     for (uint32_t s = 1; s < 32; s++) {
230       RequantizationTester()
231         .zero_point(zero_point)
232         .qmin(std::numeric_limits<uint8_t>::min())
233         .qmax(std::numeric_limits<uint8_t>::max())
234         .s(s)
235         .TestDivideByPO2WithRoundingDown(xnn_qu8_requantize_precise__scalar_signed64);
236     }
237   }
238 }
239 
TEST(QU8_PRECISE__SCALAR_SIGNED64,divide_by_po2_with_rounding_away)240 TEST(QU8_PRECISE__SCALAR_SIGNED64, divide_by_po2_with_rounding_away) {
241   for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
242     for (uint32_t s = 1; s < 32; s++) {
243       RequantizationTester()
244         .zero_point(zero_point)
245         .qmin(std::numeric_limits<uint8_t>::min())
246         .qmax(std::numeric_limits<uint8_t>::max())
247         .s(s)
248         .TestDivideByPO2WithRoundingAway(xnn_qu8_requantize_precise__scalar_signed64);
249     }
250   }
251 }
252 
TEST(QU8_PRECISE__SCALAR_SIGNED64,special_cases)253 TEST(QU8_PRECISE__SCALAR_SIGNED64, special_cases) {
254   RequantizationTester()
255     .qmin(std::numeric_limits<uint8_t>::min())
256     .qmax(std::numeric_limits<uint8_t>::max())
257     .TestSpecialCases(xnn_qu8_requantize_precise__scalar_signed64);
258 }
259 
TEST(QU8_PRECISE__SCALAR_SIGNED64,random_cases)260 TEST(QU8_PRECISE__SCALAR_SIGNED64, random_cases) {
261   RequantizationTester()
262     .qmin(std::numeric_limits<uint8_t>::min())
263     .qmax(std::numeric_limits<uint8_t>::max())
264     .zero_point(128)
265     .iterations(100)
266     .TestRandomCasesPrecise(xnn_qu8_requantize_precise__scalar_signed64);
267 }
268 
269 
270 /*
271  * FP32-based scalar implementation using lrintf function.
272  */
273 
TEST(QU8_FP32__SCALAR_LRINTF,random_cases)274 TEST(QU8_FP32__SCALAR_LRINTF, random_cases) {
275   RequantizationTester()
276     .qmin(std::numeric_limits<uint8_t>::min())
277     .qmax(std::numeric_limits<uint8_t>::max())
278     .iterations(1000)
279     .TestRandomCasesApproximate(xnn_qu8_requantize_fp32__scalar_lrintf);
280 }
281 
282 
283 /*
284  * FP32-based scalar implementation using magic trick for FP32->INT32 conversion.
285  */
286 
TEST(QU8_FP32__SCALAR_MAGIC,random_cases)287 TEST(QU8_FP32__SCALAR_MAGIC, random_cases) {
288   RequantizationTester()
289     .qmin(std::numeric_limits<uint8_t>::min())
290     .qmax(std::numeric_limits<uint8_t>::max())
291     .iterations(1000)
292     .TestRandomCasesApproximate(xnn_qu8_requantize_fp32__scalar_magic);
293 }
294 
295 
296 /*
297  * Q31-based scalar implementation.
298  */
299 
TEST(QU8_Q31__SCALAR,exact_divide_by_po2)300 TEST(QU8_Q31__SCALAR, exact_divide_by_po2) {
301   for (uint32_t s = 1; s < 32; s++) {
302     RequantizationTester()
303       .qmin(std::numeric_limits<uint8_t>::min())
304       .qmax(std::numeric_limits<uint8_t>::max())
305       .s(s)
306       .TestExactDivideByPO2(xnn_qu8_requantize_q31__scalar);
307   }
308 }
309 
TEST(QU8_Q31__SCALAR,exact_divide_by_po2_with_zero_point)310 TEST(QU8_Q31__SCALAR, exact_divide_by_po2_with_zero_point) {
311   for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
312     for (uint32_t s = 1; s < 32; s++) {
313       RequantizationTester()
314         .zero_point(zero_point)
315         .qmin(std::numeric_limits<uint8_t>::min())
316         .qmax(std::numeric_limits<uint8_t>::max())
317         .s(s)
318         .TestExactDivideByPO2(xnn_qu8_requantize_q31__scalar);
319     }
320   }
321 }
322 
TEST(QU8_Q31__SCALAR,divide_by_po2_with_rounding_up)323 TEST(QU8_Q31__SCALAR, divide_by_po2_with_rounding_up) {
324   for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
325     for (uint32_t s = 1; s < 32; s++) {
326       RequantizationTester()
327         .zero_point(zero_point)
328         .qmin(std::numeric_limits<uint8_t>::min())
329         .qmax(std::numeric_limits<uint8_t>::max())
330         .s(s)
331         .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_q31__scalar);
332     }
333   }
334 }
335 
336 /* No rounding down test - it fails because of upward bias in multiplication */
337 /* No rounding away test - it fails because of upward bias in multiplication */
338 
TEST(QU8_Q31__SCALAR,special_cases)339 TEST(QU8_Q31__SCALAR, special_cases) {
340   RequantizationTester()
341     .qmin(std::numeric_limits<uint8_t>::min())
342     .qmax(std::numeric_limits<uint8_t>::max())
343     .TestSpecialCases(xnn_qu8_requantize_q31__scalar);
344 }
345 
TEST(QU8_Q31__SCALAR,random_cases)346 TEST(QU8_Q31__SCALAR, random_cases) {
347   RequantizationTester()
348     .qmin(std::numeric_limits<uint8_t>::min())
349     .qmax(std::numeric_limits<uint8_t>::max())
350     .iterations(100)
351     .TestRandomCasesApproximate(xnn_qu8_requantize_q31__scalar);
352 }
353 
354 
355 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
356   /*
357    * Precise SSE2 implementation using floating-point shuffle.
358    */
359 
TEST(QU8_PRECISE__SSE2,exact_divide_by_po2)360   TEST(QU8_PRECISE__SSE2, exact_divide_by_po2) {
361     for (uint32_t s = 1; s < 32; s++) {
362       RequantizationTester()
363         .qmin(std::numeric_limits<uint8_t>::min())
364         .qmax(std::numeric_limits<uint8_t>::max())
365         .s(s)
366         .TestExactDivideByPO2(xnn_qu8_requantize_precise__sse2);
367     }
368   }
369 
TEST(QU8_PRECISE__SSE2,exact_divide_by_po2_with_zero_point)370   TEST(QU8_PRECISE__SSE2, exact_divide_by_po2_with_zero_point) {
371     for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
372       for (uint32_t s = 1; s < 32; s++) {
373         RequantizationTester()
374           .zero_point(zero_point)
375           .qmin(std::numeric_limits<uint8_t>::min())
376           .qmax(std::numeric_limits<uint8_t>::max())
377           .s(s)
378           .TestExactDivideByPO2(xnn_qu8_requantize_precise__sse2);
379       }
380     }
381   }
382 
TEST(QU8_PRECISE__SSE2,divide_by_po2_with_rounding_up)383   TEST(QU8_PRECISE__SSE2, divide_by_po2_with_rounding_up) {
384     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
385       for (uint32_t s = 1; s < 32; s++) {
386         RequantizationTester()
387           .zero_point(zero_point)
388           .qmin(std::numeric_limits<uint8_t>::min())
389           .qmax(std::numeric_limits<uint8_t>::max())
390           .s(s)
391           .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_precise__sse2);
392       }
393     }
394   }
395 
TEST(QU8_PRECISE__SSE2,divide_by_po2_with_rounding_down)396   TEST(QU8_PRECISE__SSE2, divide_by_po2_with_rounding_down) {
397     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
398       for (uint32_t s = 1; s < 32; s++) {
399         RequantizationTester()
400           .zero_point(zero_point)
401           .qmin(std::numeric_limits<uint8_t>::min())
402           .qmax(std::numeric_limits<uint8_t>::max())
403           .s(s)
404           .TestDivideByPO2WithRoundingDown(xnn_qu8_requantize_precise__sse2);
405       }
406     }
407   }
408 
TEST(QU8_PRECISE__SSE2,divide_by_po2_with_rounding_away)409   TEST(QU8_PRECISE__SSE2, divide_by_po2_with_rounding_away) {
410     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
411       for (uint32_t s = 1; s < 32; s++) {
412         RequantizationTester()
413           .zero_point(zero_point)
414           .qmin(std::numeric_limits<uint8_t>::min())
415           .qmax(std::numeric_limits<uint8_t>::max())
416           .s(s)
417           .TestDivideByPO2WithRoundingAway(xnn_qu8_requantize_precise__sse2);
418       }
419     }
420   }
421 
TEST(QU8_PRECISE__SSE2,special_cases)422   TEST(QU8_PRECISE__SSE2, special_cases) {
423     RequantizationTester()
424       .qmin(std::numeric_limits<uint8_t>::min())
425       .qmax(std::numeric_limits<uint8_t>::max())
426       .TestSpecialCases(xnn_qu8_requantize_precise__sse2);
427   }
428 
TEST(QU8_PRECISE__SSE2,random_cases)429   TEST(QU8_PRECISE__SSE2, random_cases) {
430     RequantizationTester()
431       .qmin(std::numeric_limits<uint8_t>::min())
432       .qmax(std::numeric_limits<uint8_t>::max())
433       .zero_point(128)
434       .iterations(100)
435       .TestRandomCasesPrecise(xnn_qu8_requantize_precise__sse2);
436   }
437 
438 
439   /*
440    * Precise SSSE3 implementation using floating-point shuffle.
441    */
442 
TEST(QU8_PRECISE__SSSE3,exact_divide_by_po2)443   TEST(QU8_PRECISE__SSSE3, exact_divide_by_po2) {
444     for (uint32_t s = 1; s < 32; s++) {
445       RequantizationTester()
446         .qmin(std::numeric_limits<uint8_t>::min())
447         .qmax(std::numeric_limits<uint8_t>::max())
448         .s(s)
449         .TestExactDivideByPO2(xnn_qu8_requantize_precise__ssse3);
450     }
451   }
452 
TEST(QU8_PRECISE__SSSE3,exact_divide_by_po2_with_zero_point)453   TEST(QU8_PRECISE__SSSE3, exact_divide_by_po2_with_zero_point) {
454     for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
455       for (uint32_t s = 1; s < 32; s++) {
456         RequantizationTester()
457           .zero_point(zero_point)
458           .qmin(std::numeric_limits<uint8_t>::min())
459           .qmax(std::numeric_limits<uint8_t>::max())
460           .s(s)
461           .TestExactDivideByPO2(xnn_qu8_requantize_precise__ssse3);
462       }
463     }
464   }
465 
TEST(QU8_PRECISE__SSSE3,divide_by_po2_with_rounding_up)466   TEST(QU8_PRECISE__SSSE3, divide_by_po2_with_rounding_up) {
467     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
468       for (uint32_t s = 1; s < 32; s++) {
469         RequantizationTester()
470           .zero_point(zero_point)
471           .qmin(std::numeric_limits<uint8_t>::min())
472           .qmax(std::numeric_limits<uint8_t>::max())
473           .s(s)
474           .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_precise__ssse3);
475       }
476     }
477   }
478 
TEST(QU8_PRECISE__SSSE3,divide_by_po2_with_rounding_down)479   TEST(QU8_PRECISE__SSSE3, divide_by_po2_with_rounding_down) {
480     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
481       for (uint32_t s = 1; s < 32; s++) {
482         RequantizationTester()
483           .zero_point(zero_point)
484           .qmin(std::numeric_limits<uint8_t>::min())
485           .qmax(std::numeric_limits<uint8_t>::max())
486           .s(s)
487           .TestDivideByPO2WithRoundingDown(xnn_qu8_requantize_precise__ssse3);
488       }
489     }
490   }
491 
TEST(QU8_PRECISE__SSSE3,divide_by_po2_with_rounding_away)492   TEST(QU8_PRECISE__SSSE3, divide_by_po2_with_rounding_away) {
493     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
494       for (uint32_t s = 1; s < 32; s++) {
495         RequantizationTester()
496           .zero_point(zero_point)
497           .qmin(std::numeric_limits<uint8_t>::min())
498           .qmax(std::numeric_limits<uint8_t>::max())
499           .s(s)
500           .TestDivideByPO2WithRoundingAway(xnn_qu8_requantize_precise__ssse3);
501       }
502     }
503   }
504 
TEST(QU8_PRECISE__SSSE3,special_cases)505   TEST(QU8_PRECISE__SSSE3, special_cases) {
506     RequantizationTester()
507       .qmin(std::numeric_limits<uint8_t>::min())
508       .qmax(std::numeric_limits<uint8_t>::max())
509       .TestSpecialCases(xnn_qu8_requantize_precise__ssse3);
510   }
511 
TEST(QU8_PRECISE__SSSE3,random_cases)512   TEST(QU8_PRECISE__SSSE3, random_cases) {
513     RequantizationTester()
514       .qmin(std::numeric_limits<uint8_t>::min())
515       .qmax(std::numeric_limits<uint8_t>::max())
516       .zero_point(128)
517       .iterations(100)
518       .TestRandomCasesPrecise(xnn_qu8_requantize_precise__ssse3);
519   }
520 
521 
522   /*
523    * Precise SSE4.1 implementation using static blend instruction.
524    */
525 
TEST(QU8_PRECISE__SSE4,exact_divide_by_po2)526   TEST(QU8_PRECISE__SSE4, exact_divide_by_po2) {
527     for (uint32_t s = 1; s < 32; s++) {
528       RequantizationTester()
529         .qmin(std::numeric_limits<uint8_t>::min())
530         .qmax(std::numeric_limits<uint8_t>::max())
531         .s(s)
532         .TestExactDivideByPO2(xnn_qu8_requantize_precise__sse4);
533     }
534   }
535 
TEST(QU8_PRECISE__SSE4,exact_divide_by_po2_with_zero_point)536   TEST(QU8_PRECISE__SSE4, exact_divide_by_po2_with_zero_point) {
537     for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
538       for (uint32_t s = 1; s < 32; s++) {
539         RequantizationTester()
540           .zero_point(zero_point)
541           .qmin(std::numeric_limits<uint8_t>::min())
542           .qmax(std::numeric_limits<uint8_t>::max())
543           .s(s)
544           .TestExactDivideByPO2(xnn_qu8_requantize_precise__sse4);
545       }
546     }
547   }
548 
TEST(QU8_PRECISE__SSE4,divide_by_po2_with_rounding_up)549   TEST(QU8_PRECISE__SSE4, divide_by_po2_with_rounding_up) {
550     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
551       for (uint32_t s = 1; s < 32; s++) {
552         RequantizationTester()
553           .zero_point(zero_point)
554           .qmin(std::numeric_limits<uint8_t>::min())
555           .qmax(std::numeric_limits<uint8_t>::max())
556           .s(s)
557           .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_precise__sse4);
558       }
559     }
560   }
561 
TEST(QU8_PRECISE__SSE4,divide_by_po2_with_rounding_down)562   TEST(QU8_PRECISE__SSE4, divide_by_po2_with_rounding_down) {
563     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
564       for (uint32_t s = 1; s < 32; s++) {
565         RequantizationTester()
566           .zero_point(zero_point)
567           .qmin(std::numeric_limits<uint8_t>::min())
568           .qmax(std::numeric_limits<uint8_t>::max())
569           .s(s)
570           .TestDivideByPO2WithRoundingDown(xnn_qu8_requantize_precise__sse4);
571       }
572     }
573   }
574 
TEST(QU8_PRECISE__SSE4,divide_by_po2_with_rounding_away)575   TEST(QU8_PRECISE__SSE4, divide_by_po2_with_rounding_away) {
576     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
577       for (uint32_t s = 1; s < 32; s++) {
578         RequantizationTester()
579           .zero_point(zero_point)
580           .qmin(std::numeric_limits<uint8_t>::min())
581           .qmax(std::numeric_limits<uint8_t>::max())
582           .s(s)
583           .TestDivideByPO2WithRoundingAway(xnn_qu8_requantize_precise__sse4);
584       }
585     }
586   }
587 
TEST(QU8_PRECISE__SSE4,special_cases)588   TEST(QU8_PRECISE__SSE4, special_cases) {
589     RequantizationTester()
590       .qmin(std::numeric_limits<uint8_t>::min())
591       .qmax(std::numeric_limits<uint8_t>::max())
592       .TestSpecialCases(xnn_qu8_requantize_precise__sse4);
593   }
594 
TEST(QU8_PRECISE__SSE4,random_cases)595   TEST(QU8_PRECISE__SSE4, random_cases) {
596     RequantizationTester()
597       .qmin(std::numeric_limits<uint8_t>::min())
598       .qmax(std::numeric_limits<uint8_t>::max())
599       .zero_point(128)
600       .iterations(100)
601       .TestRandomCasesPrecise(xnn_qu8_requantize_precise__sse4);
602   }
603 
604 
605   /*
606    * FP32-based x86 SSE2 implementation.
607    */
608 
TEST(QU8_FP32__SSE2,random_cases)609   TEST(QU8_FP32__SSE2, random_cases) {
610     RequantizationTester()
611       .qmin(std::numeric_limits<uint8_t>::min())
612       .qmax(std::numeric_limits<uint8_t>::max())
613       .iterations(1000)
614       .TestRandomCasesApproximate(xnn_qu8_requantize_fp32__sse2);
615   }
616 
617 
618   /*
619    * Q31-based x86 SSE2 implementation.
620    */
621 
TEST(QU8_Q31__SSE2,exact_divide_by_po2)622   TEST(QU8_Q31__SSE2, exact_divide_by_po2) {
623     for (uint32_t s = 1; s < 32; s++) {
624       RequantizationTester()
625         .qmin(std::numeric_limits<uint8_t>::min())
626         .qmax(std::numeric_limits<uint8_t>::max())
627         .s(s)
628         .TestExactDivideByPO2(xnn_qu8_requantize_q31__sse2);
629     }
630   }
631 
TEST(QU8_Q31__SSE2,exact_divide_by_po2_with_zero_point)632   TEST(QU8_Q31__SSE2, exact_divide_by_po2_with_zero_point) {
633     for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
634       for (uint32_t s = 1; s < 32; s++) {
635         RequantizationTester()
636           .zero_point(zero_point)
637           .qmin(std::numeric_limits<uint8_t>::min())
638           .qmax(std::numeric_limits<uint8_t>::max())
639           .s(s)
640           .TestExactDivideByPO2(xnn_qu8_requantize_q31__sse2);
641       }
642     }
643   }
644 
TEST(QU8_Q31__SSE2,divide_by_po2_with_rounding_up)645   TEST(QU8_Q31__SSE2, divide_by_po2_with_rounding_up) {
646     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
647       for (uint32_t s = 1; s < 32; s++) {
648         RequantizationTester()
649           .zero_point(zero_point)
650           .qmin(std::numeric_limits<uint8_t>::min())
651           .qmax(std::numeric_limits<uint8_t>::max())
652           .s(s)
653           .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_q31__sse2);
654       }
655     }
656   }
657 
658   /* No rounding down test - it fails because of upward bias in multiplication */
659   /* No rounding away test - it fails because of upward bias in multiplication */
660 
TEST(QU8_Q31__SSE2,special_cases)661   TEST(QU8_Q31__SSE2, special_cases) {
662     RequantizationTester()
663       .qmin(std::numeric_limits<uint8_t>::min())
664       .qmax(std::numeric_limits<uint8_t>::max())
665       .TestSpecialCases(xnn_qu8_requantize_q31__sse2);
666   }
667 
TEST(QU8_Q31__SSE2,random_cases)668   TEST(QU8_Q31__SSE2, random_cases) {
669     RequantizationTester()
670       .qmin(std::numeric_limits<uint8_t>::min())
671       .qmax(std::numeric_limits<uint8_t>::max())
672       .iterations(100)
673       .TestRandomCasesApproximate(xnn_qu8_requantize_q31__sse2);
674   }
675 
676 
677   /*
678    * Q31-based x86 SSSE3 implementation.
679    */
680 
TEST(QU8_Q31__SSSE3,exact_divide_by_po2)681   TEST(QU8_Q31__SSSE3, exact_divide_by_po2) {
682     for (uint32_t s = 1; s < 32; s++) {
683       RequantizationTester()
684         .qmin(std::numeric_limits<uint8_t>::min())
685         .qmax(std::numeric_limits<uint8_t>::max())
686         .s(s)
687         .TestExactDivideByPO2(xnn_qu8_requantize_q31__ssse3);
688     }
689   }
690 
TEST(QU8_Q31__SSSE3,exact_divide_by_po2_with_zero_point)691   TEST(QU8_Q31__SSSE3, exact_divide_by_po2_with_zero_point) {
692     for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
693       for (uint32_t s = 1; s < 32; s++) {
694         RequantizationTester()
695           .zero_point(zero_point)
696           .qmin(std::numeric_limits<uint8_t>::min())
697           .qmax(std::numeric_limits<uint8_t>::max())
698           .s(s)
699           .TestExactDivideByPO2(xnn_qu8_requantize_q31__ssse3);
700       }
701     }
702   }
703 
TEST(QU8_Q31__SSSE3,divide_by_po2_with_rounding_up)704   TEST(QU8_Q31__SSSE3, divide_by_po2_with_rounding_up) {
705     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
706       for (uint32_t s = 1; s < 32; s++) {
707         RequantizationTester()
708           .zero_point(zero_point)
709           .qmin(std::numeric_limits<uint8_t>::min())
710           .qmax(std::numeric_limits<uint8_t>::max())
711           .s(s)
712           .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_q31__ssse3);
713       }
714     }
715   }
716 
717   /* No rounding down test - it fails because of upward bias in multiplication */
718   /* No rounding away test - it fails because of upward bias in multiplication */
719 
TEST(QU8_Q31__SSSE3,special_cases)720   TEST(QU8_Q31__SSSE3, special_cases) {
721     RequantizationTester()
722       .qmin(std::numeric_limits<uint8_t>::min())
723       .qmax(std::numeric_limits<uint8_t>::max())
724       .TestSpecialCases(xnn_qu8_requantize_q31__ssse3);
725   }
726 
TEST(QU8_Q31__SSSE3,random_cases)727   TEST(QU8_Q31__SSSE3, random_cases) {
728     RequantizationTester()
729       .qmin(std::numeric_limits<uint8_t>::min())
730       .qmax(std::numeric_limits<uint8_t>::max())
731       .iterations(100)
732       .TestRandomCasesApproximate(xnn_qu8_requantize_q31__ssse3);
733   }
734 
735 
736   /*
737    * Q31-based x86 SSE4 implementation.
738    */
739 
TEST(QU8_Q31__SSE4,exact_divide_by_po2)740   TEST(QU8_Q31__SSE4, exact_divide_by_po2) {
741     for (uint32_t s = 1; s < 32; s++) {
742       RequantizationTester()
743         .qmin(std::numeric_limits<uint8_t>::min())
744         .qmax(std::numeric_limits<uint8_t>::max())
745         .s(s)
746         .TestExactDivideByPO2(xnn_qu8_requantize_q31__sse4);
747     }
748   }
749 
TEST(QU8_Q31__SSE4,exact_divide_by_po2_with_zero_point)750   TEST(QU8_Q31__SSE4, exact_divide_by_po2_with_zero_point) {
751     for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
752       for (uint32_t s = 1; s < 32; s++) {
753         RequantizationTester()
754           .zero_point(zero_point)
755           .qmin(std::numeric_limits<uint8_t>::min())
756           .qmax(std::numeric_limits<uint8_t>::max())
757           .s(s)
758           .TestExactDivideByPO2(xnn_qu8_requantize_q31__sse4);
759       }
760     }
761   }
762 
TEST(QU8_Q31__SSE4,divide_by_po2_with_rounding_up)763   TEST(QU8_Q31__SSE4, divide_by_po2_with_rounding_up) {
764     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
765       for (uint32_t s = 1; s < 32; s++) {
766         RequantizationTester()
767           .zero_point(zero_point)
768           .qmin(std::numeric_limits<uint8_t>::min())
769           .qmax(std::numeric_limits<uint8_t>::max())
770           .s(s)
771           .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_q31__sse4);
772       }
773     }
774   }
775 
776   /* No rounding down test - it fails because of upward bias in multiplication */
777   /* No rounding away test - it fails because of upward bias in multiplication */
778 
TEST(QU8_Q31__SSE4,special_cases)779   TEST(QU8_Q31__SSE4, special_cases) {
780     RequantizationTester()
781       .qmin(std::numeric_limits<uint8_t>::min())
782       .qmax(std::numeric_limits<uint8_t>::max())
783       .TestSpecialCases(xnn_qu8_requantize_q31__sse4);
784   }
785 
TEST(QU8_Q31__SSE4,random_cases)786   TEST(QU8_Q31__SSE4, random_cases) {
787     RequantizationTester()
788       .qmin(std::numeric_limits<uint8_t>::min())
789       .qmax(std::numeric_limits<uint8_t>::max())
790       .iterations(100)
791       .TestRandomCasesApproximate(xnn_qu8_requantize_q31__sse4);
792   }
793 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
794 
795 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
796   /*
797    * Precise ARM NEON implementation.
798    */
799 
TEST(QU8_PRECISE__NEON,exact_divide_by_po2)800   TEST(QU8_PRECISE__NEON, exact_divide_by_po2) {
801     for (uint32_t s = 1; s < 32; s++) {
802       RequantizationTester()
803         .qmin(std::numeric_limits<uint8_t>::min())
804         .qmax(std::numeric_limits<uint8_t>::max())
805         .s(s)
806         .TestExactDivideByPO2(xnn_qu8_requantize_precise__neon);
807     }
808   }
809 
TEST(QU8_PRECISE__NEON,exact_divide_by_po2_with_zero_point)810   TEST(QU8_PRECISE__NEON, exact_divide_by_po2_with_zero_point) {
811     for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
812       for (uint32_t s = 1; s < 32; s++) {
813         RequantizationTester()
814           .zero_point(zero_point)
815           .qmin(std::numeric_limits<uint8_t>::min())
816           .qmax(std::numeric_limits<uint8_t>::max())
817           .s(s)
818           .TestExactDivideByPO2(xnn_qu8_requantize_precise__neon);
819       }
820     }
821   }
822 
TEST(QU8_PRECISE__NEON,divide_by_po2_with_rounding_up)823   TEST(QU8_PRECISE__NEON, divide_by_po2_with_rounding_up) {
824     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
825       for (uint32_t s = 1; s < 32; s++) {
826         RequantizationTester()
827           .zero_point(zero_point)
828           .qmin(std::numeric_limits<uint8_t>::min())
829           .qmax(std::numeric_limits<uint8_t>::max())
830           .s(s)
831           .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_precise__neon);
832       }
833     }
834   }
835 
TEST(QU8_PRECISE__NEON,divide_by_po2_with_rounding_down)836   TEST(QU8_PRECISE__NEON, divide_by_po2_with_rounding_down) {
837     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
838       for (uint32_t s = 1; s < 32; s++) {
839         RequantizationTester()
840           .zero_point(zero_point)
841           .qmin(std::numeric_limits<uint8_t>::min())
842           .qmax(std::numeric_limits<uint8_t>::max())
843           .s(s)
844           .TestDivideByPO2WithRoundingDown(xnn_qu8_requantize_precise__neon);
845       }
846     }
847   }
848 
TEST(QU8_PRECISE__NEON,divide_by_po2_with_rounding_away)849   TEST(QU8_PRECISE__NEON, divide_by_po2_with_rounding_away) {
850     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
851       for (uint32_t s = 1; s < 32; s++) {
852         RequantizationTester()
853           .zero_point(zero_point)
854           .qmin(std::numeric_limits<uint8_t>::min())
855           .qmax(std::numeric_limits<uint8_t>::max())
856           .s(s)
857           .TestDivideByPO2WithRoundingAway(xnn_qu8_requantize_precise__neon);
858       }
859     }
860   }
861 
TEST(QU8_PRECISE__NEON,special_cases)862   TEST(QU8_PRECISE__NEON, special_cases) {
863     RequantizationTester()
864       .qmin(std::numeric_limits<uint8_t>::min())
865       .qmax(std::numeric_limits<uint8_t>::max())
866       .TestSpecialCases(xnn_qu8_requantize_precise__neon);
867   }
868 
TEST(QU8_PRECISE__NEON,random_cases)869   TEST(QU8_PRECISE__NEON, random_cases) {
870     RequantizationTester()
871       .qmin(std::numeric_limits<uint8_t>::min())
872       .qmax(std::numeric_limits<uint8_t>::max())
873       .zero_point(128)
874       .iterations(100)
875       .TestRandomCasesPrecise(xnn_qu8_requantize_precise__neon);
876   }
877 
878 
879   /*
880    * FP32-based ARM NEON implementation.
881    */
882 
TEST(QU8_FP32__NEON,random_cases)883   TEST(QU8_FP32__NEON, random_cases) {
884     RequantizationTester()
885       .qmin(std::numeric_limits<uint8_t>::min())
886       .qmax(std::numeric_limits<uint8_t>::max())
887       .iterations(1000)
888       .TestRandomCasesApproximate(xnn_qu8_requantize_fp32__neon);
889   }
890 
891 
892   /*
893    * Q31-based ARM NEON implementation.
894    */
895 
TEST(QU8_Q31__NEON,exact_divide_by_po2)896   TEST(QU8_Q31__NEON, exact_divide_by_po2) {
897     for (uint32_t s = 1; s < 32; s++) {
898       RequantizationTester()
899         .qmin(std::numeric_limits<uint8_t>::min())
900         .qmax(std::numeric_limits<uint8_t>::max())
901         .s(s)
902         .TestExactDivideByPO2(xnn_qu8_requantize_q31__neon);
903     }
904   }
905 
TEST(QU8_Q31__NEON,exact_divide_by_po2_with_zero_point)906   TEST(QU8_Q31__NEON, exact_divide_by_po2_with_zero_point) {
907     for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
908       for (uint32_t s = 1; s < 32; s++) {
909         RequantizationTester()
910           .zero_point(zero_point)
911           .qmin(std::numeric_limits<uint8_t>::min())
912           .qmax(std::numeric_limits<uint8_t>::max())
913           .s(s)
914           .TestExactDivideByPO2(xnn_qu8_requantize_q31__neon);
915       }
916     }
917   }
918 
TEST(QU8_Q31__NEON,divide_by_po2_with_rounding_up)919   TEST(QU8_Q31__NEON, divide_by_po2_with_rounding_up) {
920     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
921       for (uint32_t s = 1; s < 32; s++) {
922         RequantizationTester()
923           .zero_point(zero_point)
924           .qmin(std::numeric_limits<uint8_t>::min())
925           .qmax(std::numeric_limits<uint8_t>::max())
926           .s(s)
927           .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_q31__neon);
928       }
929     }
930   }
931 
932   /* No rounding down test - it fails because of upward bias in multiplication */
933   /* No rounding away test - it fails because of upward bias in multiplication */
934 
TEST(QU8_Q31__NEON,special_cases)935   TEST(QU8_Q31__NEON, special_cases) {
936     RequantizationTester()
937       .qmin(std::numeric_limits<uint8_t>::min())
938       .qmax(std::numeric_limits<uint8_t>::max())
939       .TestSpecialCases(xnn_qu8_requantize_q31__neon);
940   }
941 
TEST(QU8_Q31__NEON,random_cases)942   TEST(QU8_Q31__NEON, random_cases) {
943     RequantizationTester()
944       .qmin(std::numeric_limits<uint8_t>::min())
945       .qmax(std::numeric_limits<uint8_t>::max())
946       .iterations(100)
947       .TestRandomCasesApproximate(xnn_qu8_requantize_q31__neon);
948   }
949 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
950 
951 #if XNN_ARCH_WASMSIMD
952   /*
953    * FP32-based ARM NEON implementation.
954    */
955 
TEST(QU8_FP32__WASMSIMD,random_cases)956   TEST(QU8_FP32__WASMSIMD, random_cases) {
957     RequantizationTester()
958       .qmin(std::numeric_limits<uint8_t>::min())
959       .qmax(std::numeric_limits<uint8_t>::max())
960       .iterations(1000)
961       .TestRandomCasesApproximate(xnn_qu8_requantize_fp32__wasmsimd);
962   }
963 
964 
965   /*
966    * Q31-based WAsmd SIMD implementation.
967    */
968 
TEST(QU8_Q31__WASMSIMD,exact_divide_by_po2)969   TEST(QU8_Q31__WASMSIMD, exact_divide_by_po2) {
970     for (uint32_t s = 1; s < 32; s++) {
971       RequantizationTester()
972         .qmin(std::numeric_limits<uint8_t>::min())
973         .qmax(std::numeric_limits<uint8_t>::max())
974         .s(s)
975         .TestExactDivideByPO2(xnn_qu8_requantize_q31__wasmsimd);
976     }
977   }
978 
TEST(QU8_Q31__WASMSIMD,exact_divide_by_po2_with_zero_point)979   TEST(QU8_Q31__WASMSIMD, exact_divide_by_po2_with_zero_point) {
980     for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
981       for (uint32_t s = 1; s < 32; s++) {
982         RequantizationTester()
983           .zero_point(zero_point)
984           .qmin(std::numeric_limits<uint8_t>::min())
985           .qmax(std::numeric_limits<uint8_t>::max())
986           .s(s)
987           .TestExactDivideByPO2(xnn_qu8_requantize_q31__wasmsimd);
988       }
989     }
990   }
991 
TEST(QU8_Q31__WASMSIMD,divide_by_po2_with_rounding_up)992   TEST(QU8_Q31__WASMSIMD, divide_by_po2_with_rounding_up) {
993     for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
994       for (uint32_t s = 1; s < 32; s++) {
995         RequantizationTester()
996           .zero_point(zero_point)
997           .qmin(std::numeric_limits<uint8_t>::min())
998           .qmax(std::numeric_limits<uint8_t>::max())
999           .s(s)
1000           .TestDivideByPO2WithRoundingUp(xnn_qu8_requantize_q31__wasmsimd);
1001       }
1002     }
1003   }
1004 
1005   /* No rounding down test - it fails because of upward bias in multiplication */
1006   /* No rounding away test - it fails because of upward bias in multiplication */
1007 
TEST(QU8_Q31__WASMSIMD,special_cases)1008   TEST(QU8_Q31__WASMSIMD, special_cases) {
1009     RequantizationTester()
1010       .qmin(std::numeric_limits<uint8_t>::min())
1011       .qmax(std::numeric_limits<uint8_t>::max())
1012       .TestSpecialCases(xnn_qu8_requantize_q31__wasmsimd);
1013   }
1014 
TEST(QU8_Q31__WASMSIMD,random_cases)1015   TEST(QU8_Q31__WASMSIMD, random_cases) {
1016     RequantizationTester()
1017       .qmin(std::numeric_limits<uint8_t>::min())
1018       .qmax(std::numeric_limits<uint8_t>::max())
1019       .iterations(100)
1020       .TestRandomCasesApproximate(xnn_qu8_requantize_q31__wasmsimd);
1021   }
1022 #endif  // XNN_ARCH_WASMSIMD
1023