• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 //   Specification: test/qs8-vmul-minmax-fp32.yaml
8 //   Generator: tools/generate-vbinary-test.py
9 
10 
11 #include <gtest/gtest.h>
12 
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15 
16 #include <xnnpack/microparams-init.h>
17 #include <xnnpack/vmul.h>
18 #include "vmul-microkernel-tester.h"
19 
20 
21 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,batch_eq_8)22   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, batch_eq_8) {
23     TEST_REQUIRES_ARM_NEON;
24     VMulMicrokernelTester()
25       .batch_size(8)
26       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
27   }
28 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,batch_div_8)29   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, batch_div_8) {
30     TEST_REQUIRES_ARM_NEON;
31     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
32       VMulMicrokernelTester()
33         .batch_size(batch_size)
34         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
35     }
36   }
37 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,batch_lt_8)38   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, batch_lt_8) {
39     TEST_REQUIRES_ARM_NEON;
40     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
41       VMulMicrokernelTester()
42         .batch_size(batch_size)
43         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
44     }
45   }
46 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,batch_gt_8)47   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, batch_gt_8) {
48     TEST_REQUIRES_ARM_NEON;
49     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
50       VMulMicrokernelTester()
51         .batch_size(batch_size)
52         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
53     }
54   }
55 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,inplace_a)56   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, inplace_a) {
57     TEST_REQUIRES_ARM_NEON;
58     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
59       VMulMicrokernelTester()
60         .batch_size(batch_size)
61         .inplace_a(true)
62         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
63     }
64   }
65 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,inplace_b)66   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, inplace_b) {
67     TEST_REQUIRES_ARM_NEON;
68     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
69       VMulMicrokernelTester()
70         .batch_size(batch_size)
71         .inplace_b(true)
72         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
73     }
74   }
75 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,inplace_a_and_b)76   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, inplace_a_and_b) {
77     TEST_REQUIRES_ARM_NEON;
78     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
79       VMulMicrokernelTester()
80         .batch_size(batch_size)
81         .inplace_a(true)
82         .inplace_b(true)
83         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
84     }
85   }
86 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,a_zero_point)87   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, a_zero_point) {
88     TEST_REQUIRES_ARM_NEON;
89     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
90       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
91         VMulMicrokernelTester()
92           .batch_size(batch_size)
93           .a_zero_point(a_zero_point)
94           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
95       }
96     }
97   }
98 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,b_zero_point)99   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, b_zero_point) {
100     TEST_REQUIRES_ARM_NEON;
101     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
102       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
103         VMulMicrokernelTester()
104           .batch_size(batch_size)
105           .b_zero_point(b_zero_point)
106           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
107       }
108     }
109   }
110 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,y_zero_point)111   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, y_zero_point) {
112     TEST_REQUIRES_ARM_NEON;
113     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
114       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
115         VMulMicrokernelTester()
116           .batch_size(batch_size)
117           .y_zero_point(y_zero_point)
118           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
119       }
120     }
121   }
122 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,a_scale)123   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, a_scale) {
124     TEST_REQUIRES_ARM_NEON;
125     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
126       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
127         VMulMicrokernelTester()
128           .batch_size(batch_size)
129           .a_scale(a_scale)
130           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
131       }
132     }
133   }
134 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,b_scale)135   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, b_scale) {
136     TEST_REQUIRES_ARM_NEON;
137     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
138       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
139         VMulMicrokernelTester()
140           .batch_size(batch_size)
141           .b_scale(b_scale)
142           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
143       }
144     }
145   }
146 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,y_scale)147   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, y_scale) {
148     TEST_REQUIRES_ARM_NEON;
149     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
150       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
151         VMulMicrokernelTester()
152           .batch_size(batch_size)
153           .y_scale(y_scale)
154           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
155       }
156     }
157   }
158 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,qmin)159   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, qmin) {
160     TEST_REQUIRES_ARM_NEON;
161     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
162       VMulMicrokernelTester()
163         .batch_size(batch_size)
164         .qmin(128)
165         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
166     }
167   }
168 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,qmax)169   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, qmax) {
170     TEST_REQUIRES_ARM_NEON;
171     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
172       VMulMicrokernelTester()
173         .batch_size(batch_size)
174         .qmax(128)
175         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
176     }
177   }
178 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
179 
180 
181 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,batch_eq_16)182   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, batch_eq_16) {
183     TEST_REQUIRES_ARM_NEON;
184     VMulMicrokernelTester()
185       .batch_size(16)
186       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
187   }
188 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,batch_div_16)189   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, batch_div_16) {
190     TEST_REQUIRES_ARM_NEON;
191     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
192       VMulMicrokernelTester()
193         .batch_size(batch_size)
194         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
195     }
196   }
197 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,batch_lt_16)198   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, batch_lt_16) {
199     TEST_REQUIRES_ARM_NEON;
200     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
201       VMulMicrokernelTester()
202         .batch_size(batch_size)
203         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
204     }
205   }
206 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,batch_gt_16)207   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, batch_gt_16) {
208     TEST_REQUIRES_ARM_NEON;
209     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
210       VMulMicrokernelTester()
211         .batch_size(batch_size)
212         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
213     }
214   }
215 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,inplace_a)216   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, inplace_a) {
217     TEST_REQUIRES_ARM_NEON;
218     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
219       VMulMicrokernelTester()
220         .batch_size(batch_size)
221         .inplace_a(true)
222         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
223     }
224   }
225 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,inplace_b)226   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, inplace_b) {
227     TEST_REQUIRES_ARM_NEON;
228     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
229       VMulMicrokernelTester()
230         .batch_size(batch_size)
231         .inplace_b(true)
232         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
233     }
234   }
235 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,inplace_a_and_b)236   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, inplace_a_and_b) {
237     TEST_REQUIRES_ARM_NEON;
238     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
239       VMulMicrokernelTester()
240         .batch_size(batch_size)
241         .inplace_a(true)
242         .inplace_b(true)
243         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
244     }
245   }
246 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,a_zero_point)247   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, a_zero_point) {
248     TEST_REQUIRES_ARM_NEON;
249     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
250       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
251         VMulMicrokernelTester()
252           .batch_size(batch_size)
253           .a_zero_point(a_zero_point)
254           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
255       }
256     }
257   }
258 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,b_zero_point)259   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, b_zero_point) {
260     TEST_REQUIRES_ARM_NEON;
261     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
262       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
263         VMulMicrokernelTester()
264           .batch_size(batch_size)
265           .b_zero_point(b_zero_point)
266           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
267       }
268     }
269   }
270 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,y_zero_point)271   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, y_zero_point) {
272     TEST_REQUIRES_ARM_NEON;
273     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
274       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
275         VMulMicrokernelTester()
276           .batch_size(batch_size)
277           .y_zero_point(y_zero_point)
278           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
279       }
280     }
281   }
282 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,a_scale)283   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, a_scale) {
284     TEST_REQUIRES_ARM_NEON;
285     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
286       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
287         VMulMicrokernelTester()
288           .batch_size(batch_size)
289           .a_scale(a_scale)
290           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
291       }
292     }
293   }
294 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,b_scale)295   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, b_scale) {
296     TEST_REQUIRES_ARM_NEON;
297     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
298       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
299         VMulMicrokernelTester()
300           .batch_size(batch_size)
301           .b_scale(b_scale)
302           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
303       }
304     }
305   }
306 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,y_scale)307   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, y_scale) {
308     TEST_REQUIRES_ARM_NEON;
309     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
310       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
311         VMulMicrokernelTester()
312           .batch_size(batch_size)
313           .y_scale(y_scale)
314           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
315       }
316     }
317   }
318 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,qmin)319   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, qmin) {
320     TEST_REQUIRES_ARM_NEON;
321     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
322       VMulMicrokernelTester()
323         .batch_size(batch_size)
324         .qmin(128)
325         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
326     }
327   }
328 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,qmax)329   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, qmax) {
330     TEST_REQUIRES_ARM_NEON;
331     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
332       VMulMicrokernelTester()
333         .batch_size(batch_size)
334         .qmax(128)
335         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
336     }
337   }
338 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
339 
340 
341 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,batch_eq_16)342   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, batch_eq_16) {
343     TEST_REQUIRES_ARM_NEON;
344     VMulMicrokernelTester()
345       .batch_size(16)
346       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
347   }
348 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,batch_div_16)349   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, batch_div_16) {
350     TEST_REQUIRES_ARM_NEON;
351     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
352       VMulMicrokernelTester()
353         .batch_size(batch_size)
354         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
355     }
356   }
357 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,batch_lt_16)358   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, batch_lt_16) {
359     TEST_REQUIRES_ARM_NEON;
360     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
361       VMulMicrokernelTester()
362         .batch_size(batch_size)
363         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
364     }
365   }
366 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,batch_gt_16)367   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, batch_gt_16) {
368     TEST_REQUIRES_ARM_NEON;
369     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
370       VMulMicrokernelTester()
371         .batch_size(batch_size)
372         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
373     }
374   }
375 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,inplace_a)376   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, inplace_a) {
377     TEST_REQUIRES_ARM_NEON;
378     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
379       VMulMicrokernelTester()
380         .batch_size(batch_size)
381         .inplace_a(true)
382         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
383     }
384   }
385 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,inplace_b)386   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, inplace_b) {
387     TEST_REQUIRES_ARM_NEON;
388     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
389       VMulMicrokernelTester()
390         .batch_size(batch_size)
391         .inplace_b(true)
392         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
393     }
394   }
395 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,inplace_a_and_b)396   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, inplace_a_and_b) {
397     TEST_REQUIRES_ARM_NEON;
398     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
399       VMulMicrokernelTester()
400         .batch_size(batch_size)
401         .inplace_a(true)
402         .inplace_b(true)
403         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
404     }
405   }
406 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,a_zero_point)407   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, a_zero_point) {
408     TEST_REQUIRES_ARM_NEON;
409     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
410       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
411         VMulMicrokernelTester()
412           .batch_size(batch_size)
413           .a_zero_point(a_zero_point)
414           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
415       }
416     }
417   }
418 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,b_zero_point)419   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, b_zero_point) {
420     TEST_REQUIRES_ARM_NEON;
421     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
422       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
423         VMulMicrokernelTester()
424           .batch_size(batch_size)
425           .b_zero_point(b_zero_point)
426           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
427       }
428     }
429   }
430 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,y_zero_point)431   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, y_zero_point) {
432     TEST_REQUIRES_ARM_NEON;
433     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
434       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
435         VMulMicrokernelTester()
436           .batch_size(batch_size)
437           .y_zero_point(y_zero_point)
438           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
439       }
440     }
441   }
442 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,a_scale)443   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, a_scale) {
444     TEST_REQUIRES_ARM_NEON;
445     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
446       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
447         VMulMicrokernelTester()
448           .batch_size(batch_size)
449           .a_scale(a_scale)
450           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
451       }
452     }
453   }
454 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,b_scale)455   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, b_scale) {
456     TEST_REQUIRES_ARM_NEON;
457     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
458       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
459         VMulMicrokernelTester()
460           .batch_size(batch_size)
461           .b_scale(b_scale)
462           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
463       }
464     }
465   }
466 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,y_scale)467   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, y_scale) {
468     TEST_REQUIRES_ARM_NEON;
469     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
470       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
471         VMulMicrokernelTester()
472           .batch_size(batch_size)
473           .y_scale(y_scale)
474           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
475       }
476     }
477   }
478 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,qmin)479   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, qmin) {
480     TEST_REQUIRES_ARM_NEON;
481     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
482       VMulMicrokernelTester()
483         .batch_size(batch_size)
484         .qmin(128)
485         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
486     }
487   }
488 
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,qmax)489   TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, qmax) {
490     TEST_REQUIRES_ARM_NEON;
491     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
492       VMulMicrokernelTester()
493         .batch_size(batch_size)
494         .qmax(128)
495         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
496     }
497   }
498 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
499 
500 
501 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,batch_eq_8)502   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, batch_eq_8) {
503     TEST_REQUIRES_ARM_NEON_V8;
504     VMulMicrokernelTester()
505       .batch_size(8)
506       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
507   }
508 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,batch_div_8)509   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, batch_div_8) {
510     TEST_REQUIRES_ARM_NEON_V8;
511     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
512       VMulMicrokernelTester()
513         .batch_size(batch_size)
514         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
515     }
516   }
517 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,batch_lt_8)518   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, batch_lt_8) {
519     TEST_REQUIRES_ARM_NEON_V8;
520     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
521       VMulMicrokernelTester()
522         .batch_size(batch_size)
523         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
524     }
525   }
526 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,batch_gt_8)527   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, batch_gt_8) {
528     TEST_REQUIRES_ARM_NEON_V8;
529     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
530       VMulMicrokernelTester()
531         .batch_size(batch_size)
532         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
533     }
534   }
535 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,inplace_a)536   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, inplace_a) {
537     TEST_REQUIRES_ARM_NEON_V8;
538     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
539       VMulMicrokernelTester()
540         .batch_size(batch_size)
541         .inplace_a(true)
542         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
543     }
544   }
545 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,inplace_b)546   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, inplace_b) {
547     TEST_REQUIRES_ARM_NEON_V8;
548     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
549       VMulMicrokernelTester()
550         .batch_size(batch_size)
551         .inplace_b(true)
552         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
553     }
554   }
555 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,inplace_a_and_b)556   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, inplace_a_and_b) {
557     TEST_REQUIRES_ARM_NEON_V8;
558     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
559       VMulMicrokernelTester()
560         .batch_size(batch_size)
561         .inplace_a(true)
562         .inplace_b(true)
563         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
564     }
565   }
566 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,a_zero_point)567   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, a_zero_point) {
568     TEST_REQUIRES_ARM_NEON_V8;
569     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
570       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
571         VMulMicrokernelTester()
572           .batch_size(batch_size)
573           .a_zero_point(a_zero_point)
574           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
575       }
576     }
577   }
578 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,b_zero_point)579   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, b_zero_point) {
580     TEST_REQUIRES_ARM_NEON_V8;
581     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
582       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
583         VMulMicrokernelTester()
584           .batch_size(batch_size)
585           .b_zero_point(b_zero_point)
586           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
587       }
588     }
589   }
590 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,y_zero_point)591   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, y_zero_point) {
592     TEST_REQUIRES_ARM_NEON_V8;
593     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
594       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
595         VMulMicrokernelTester()
596           .batch_size(batch_size)
597           .y_zero_point(y_zero_point)
598           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
599       }
600     }
601   }
602 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,a_scale)603   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, a_scale) {
604     TEST_REQUIRES_ARM_NEON_V8;
605     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
606       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
607         VMulMicrokernelTester()
608           .batch_size(batch_size)
609           .a_scale(a_scale)
610           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
611       }
612     }
613   }
614 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,b_scale)615   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, b_scale) {
616     TEST_REQUIRES_ARM_NEON_V8;
617     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
618       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
619         VMulMicrokernelTester()
620           .batch_size(batch_size)
621           .b_scale(b_scale)
622           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
623       }
624     }
625   }
626 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,y_scale)627   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, y_scale) {
628     TEST_REQUIRES_ARM_NEON_V8;
629     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
630       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
631         VMulMicrokernelTester()
632           .batch_size(batch_size)
633           .y_scale(y_scale)
634           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
635       }
636     }
637   }
638 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,qmin)639   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, qmin) {
640     TEST_REQUIRES_ARM_NEON_V8;
641     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
642       VMulMicrokernelTester()
643         .batch_size(batch_size)
644         .qmin(128)
645         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
646     }
647   }
648 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,qmax)649   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, qmax) {
650     TEST_REQUIRES_ARM_NEON_V8;
651     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
652       VMulMicrokernelTester()
653         .batch_size(batch_size)
654         .qmax(128)
655         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
656     }
657   }
658 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
659 
660 
661 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,batch_eq_16)662   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, batch_eq_16) {
663     TEST_REQUIRES_ARM_NEON_V8;
664     VMulMicrokernelTester()
665       .batch_size(16)
666       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
667   }
668 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,batch_div_16)669   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, batch_div_16) {
670     TEST_REQUIRES_ARM_NEON_V8;
671     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
672       VMulMicrokernelTester()
673         .batch_size(batch_size)
674         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
675     }
676   }
677 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,batch_lt_16)678   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, batch_lt_16) {
679     TEST_REQUIRES_ARM_NEON_V8;
680     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
681       VMulMicrokernelTester()
682         .batch_size(batch_size)
683         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
684     }
685   }
686 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,batch_gt_16)687   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, batch_gt_16) {
688     TEST_REQUIRES_ARM_NEON_V8;
689     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
690       VMulMicrokernelTester()
691         .batch_size(batch_size)
692         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
693     }
694   }
695 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,inplace_a)696   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, inplace_a) {
697     TEST_REQUIRES_ARM_NEON_V8;
698     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
699       VMulMicrokernelTester()
700         .batch_size(batch_size)
701         .inplace_a(true)
702         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
703     }
704   }
705 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,inplace_b)706   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, inplace_b) {
707     TEST_REQUIRES_ARM_NEON_V8;
708     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
709       VMulMicrokernelTester()
710         .batch_size(batch_size)
711         .inplace_b(true)
712         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
713     }
714   }
715 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,inplace_a_and_b)716   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, inplace_a_and_b) {
717     TEST_REQUIRES_ARM_NEON_V8;
718     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
719       VMulMicrokernelTester()
720         .batch_size(batch_size)
721         .inplace_a(true)
722         .inplace_b(true)
723         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
724     }
725   }
726 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,a_zero_point)727   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, a_zero_point) {
728     TEST_REQUIRES_ARM_NEON_V8;
729     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
730       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
731         VMulMicrokernelTester()
732           .batch_size(batch_size)
733           .a_zero_point(a_zero_point)
734           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
735       }
736     }
737   }
738 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,b_zero_point)739   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, b_zero_point) {
740     TEST_REQUIRES_ARM_NEON_V8;
741     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
742       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
743         VMulMicrokernelTester()
744           .batch_size(batch_size)
745           .b_zero_point(b_zero_point)
746           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
747       }
748     }
749   }
750 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,y_zero_point)751   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, y_zero_point) {
752     TEST_REQUIRES_ARM_NEON_V8;
753     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
754       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
755         VMulMicrokernelTester()
756           .batch_size(batch_size)
757           .y_zero_point(y_zero_point)
758           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
759       }
760     }
761   }
762 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,a_scale)763   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, a_scale) {
764     TEST_REQUIRES_ARM_NEON_V8;
765     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
766       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
767         VMulMicrokernelTester()
768           .batch_size(batch_size)
769           .a_scale(a_scale)
770           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
771       }
772     }
773   }
774 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,b_scale)775   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, b_scale) {
776     TEST_REQUIRES_ARM_NEON_V8;
777     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
778       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
779         VMulMicrokernelTester()
780           .batch_size(batch_size)
781           .b_scale(b_scale)
782           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
783       }
784     }
785   }
786 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,y_scale)787   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, y_scale) {
788     TEST_REQUIRES_ARM_NEON_V8;
789     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
790       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
791         VMulMicrokernelTester()
792           .batch_size(batch_size)
793           .y_scale(y_scale)
794           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
795       }
796     }
797   }
798 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,qmin)799   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, qmin) {
800     TEST_REQUIRES_ARM_NEON_V8;
801     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
802       VMulMicrokernelTester()
803         .batch_size(batch_size)
804         .qmin(128)
805         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
806     }
807   }
808 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,qmax)809   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, qmax) {
810     TEST_REQUIRES_ARM_NEON_V8;
811     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
812       VMulMicrokernelTester()
813         .batch_size(batch_size)
814         .qmax(128)
815         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
816     }
817   }
818 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
819 
820 
821 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,batch_eq_16)822   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, batch_eq_16) {
823     TEST_REQUIRES_ARM_NEON_V8;
824     VMulMicrokernelTester()
825       .batch_size(16)
826       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
827   }
828 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,batch_div_16)829   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, batch_div_16) {
830     TEST_REQUIRES_ARM_NEON_V8;
831     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
832       VMulMicrokernelTester()
833         .batch_size(batch_size)
834         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
835     }
836   }
837 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,batch_lt_16)838   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, batch_lt_16) {
839     TEST_REQUIRES_ARM_NEON_V8;
840     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
841       VMulMicrokernelTester()
842         .batch_size(batch_size)
843         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
844     }
845   }
846 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,batch_gt_16)847   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, batch_gt_16) {
848     TEST_REQUIRES_ARM_NEON_V8;
849     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
850       VMulMicrokernelTester()
851         .batch_size(batch_size)
852         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
853     }
854   }
855 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,inplace_a)856   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, inplace_a) {
857     TEST_REQUIRES_ARM_NEON_V8;
858     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
859       VMulMicrokernelTester()
860         .batch_size(batch_size)
861         .inplace_a(true)
862         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
863     }
864   }
865 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,inplace_b)866   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, inplace_b) {
867     TEST_REQUIRES_ARM_NEON_V8;
868     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
869       VMulMicrokernelTester()
870         .batch_size(batch_size)
871         .inplace_b(true)
872         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
873     }
874   }
875 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,inplace_a_and_b)876   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, inplace_a_and_b) {
877     TEST_REQUIRES_ARM_NEON_V8;
878     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
879       VMulMicrokernelTester()
880         .batch_size(batch_size)
881         .inplace_a(true)
882         .inplace_b(true)
883         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
884     }
885   }
886 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,a_zero_point)887   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, a_zero_point) {
888     TEST_REQUIRES_ARM_NEON_V8;
889     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
890       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
891         VMulMicrokernelTester()
892           .batch_size(batch_size)
893           .a_zero_point(a_zero_point)
894           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
895       }
896     }
897   }
898 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,b_zero_point)899   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, b_zero_point) {
900     TEST_REQUIRES_ARM_NEON_V8;
901     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
902       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
903         VMulMicrokernelTester()
904           .batch_size(batch_size)
905           .b_zero_point(b_zero_point)
906           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
907       }
908     }
909   }
910 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,y_zero_point)911   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, y_zero_point) {
912     TEST_REQUIRES_ARM_NEON_V8;
913     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
914       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
915         VMulMicrokernelTester()
916           .batch_size(batch_size)
917           .y_zero_point(y_zero_point)
918           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
919       }
920     }
921   }
922 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,a_scale)923   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, a_scale) {
924     TEST_REQUIRES_ARM_NEON_V8;
925     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
926       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
927         VMulMicrokernelTester()
928           .batch_size(batch_size)
929           .a_scale(a_scale)
930           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
931       }
932     }
933   }
934 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,b_scale)935   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, b_scale) {
936     TEST_REQUIRES_ARM_NEON_V8;
937     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
938       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
939         VMulMicrokernelTester()
940           .batch_size(batch_size)
941           .b_scale(b_scale)
942           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
943       }
944     }
945   }
946 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,y_scale)947   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, y_scale) {
948     TEST_REQUIRES_ARM_NEON_V8;
949     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
950       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
951         VMulMicrokernelTester()
952           .batch_size(batch_size)
953           .y_scale(y_scale)
954           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
955       }
956     }
957   }
958 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,qmin)959   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, qmin) {
960     TEST_REQUIRES_ARM_NEON_V8;
961     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
962       VMulMicrokernelTester()
963         .batch_size(batch_size)
964         .qmin(128)
965         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
966     }
967   }
968 
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,qmax)969   TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, qmax) {
970     TEST_REQUIRES_ARM_NEON_V8;
971     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
972       VMulMicrokernelTester()
973         .batch_size(batch_size)
974         .qmax(128)
975         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
976     }
977   }
978 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
979 
980 
981 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,batch_eq_8)982   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, batch_eq_8) {
983     TEST_REQUIRES_X86_SSE2;
984     VMulMicrokernelTester()
985       .batch_size(8)
986       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
987   }
988 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,batch_div_8)989   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, batch_div_8) {
990     TEST_REQUIRES_X86_SSE2;
991     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
992       VMulMicrokernelTester()
993         .batch_size(batch_size)
994         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
995     }
996   }
997 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,batch_lt_8)998   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, batch_lt_8) {
999     TEST_REQUIRES_X86_SSE2;
1000     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1001       VMulMicrokernelTester()
1002         .batch_size(batch_size)
1003         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1004     }
1005   }
1006 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,batch_gt_8)1007   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, batch_gt_8) {
1008     TEST_REQUIRES_X86_SSE2;
1009     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1010       VMulMicrokernelTester()
1011         .batch_size(batch_size)
1012         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1013     }
1014   }
1015 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,inplace_a)1016   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, inplace_a) {
1017     TEST_REQUIRES_X86_SSE2;
1018     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1019       VMulMicrokernelTester()
1020         .batch_size(batch_size)
1021         .inplace_a(true)
1022         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1023     }
1024   }
1025 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,inplace_b)1026   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, inplace_b) {
1027     TEST_REQUIRES_X86_SSE2;
1028     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1029       VMulMicrokernelTester()
1030         .batch_size(batch_size)
1031         .inplace_b(true)
1032         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1033     }
1034   }
1035 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,inplace_a_and_b)1036   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, inplace_a_and_b) {
1037     TEST_REQUIRES_X86_SSE2;
1038     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1039       VMulMicrokernelTester()
1040         .batch_size(batch_size)
1041         .inplace_a(true)
1042         .inplace_b(true)
1043         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1044     }
1045   }
1046 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,a_zero_point)1047   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, a_zero_point) {
1048     TEST_REQUIRES_X86_SSE2;
1049     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1050       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1051         VMulMicrokernelTester()
1052           .batch_size(batch_size)
1053           .a_zero_point(a_zero_point)
1054           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1055       }
1056     }
1057   }
1058 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,b_zero_point)1059   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, b_zero_point) {
1060     TEST_REQUIRES_X86_SSE2;
1061     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1062       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1063         VMulMicrokernelTester()
1064           .batch_size(batch_size)
1065           .b_zero_point(b_zero_point)
1066           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1067       }
1068     }
1069   }
1070 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,y_zero_point)1071   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, y_zero_point) {
1072     TEST_REQUIRES_X86_SSE2;
1073     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1074       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1075         VMulMicrokernelTester()
1076           .batch_size(batch_size)
1077           .y_zero_point(y_zero_point)
1078           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1079       }
1080     }
1081   }
1082 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,a_scale)1083   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, a_scale) {
1084     TEST_REQUIRES_X86_SSE2;
1085     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1086       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1087         VMulMicrokernelTester()
1088           .batch_size(batch_size)
1089           .a_scale(a_scale)
1090           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1091       }
1092     }
1093   }
1094 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,b_scale)1095   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, b_scale) {
1096     TEST_REQUIRES_X86_SSE2;
1097     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1098       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1099         VMulMicrokernelTester()
1100           .batch_size(batch_size)
1101           .b_scale(b_scale)
1102           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1103       }
1104     }
1105   }
1106 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,y_scale)1107   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, y_scale) {
1108     TEST_REQUIRES_X86_SSE2;
1109     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1110       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1111         VMulMicrokernelTester()
1112           .batch_size(batch_size)
1113           .y_scale(y_scale)
1114           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1115       }
1116     }
1117   }
1118 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,qmin)1119   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, qmin) {
1120     TEST_REQUIRES_X86_SSE2;
1121     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1122       VMulMicrokernelTester()
1123         .batch_size(batch_size)
1124         .qmin(128)
1125         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1126     }
1127   }
1128 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,qmax)1129   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, qmax) {
1130     TEST_REQUIRES_X86_SSE2;
1131     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1132       VMulMicrokernelTester()
1133         .batch_size(batch_size)
1134         .qmax(128)
1135         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1136     }
1137   }
1138 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1139 
1140 
1141 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,batch_eq_16)1142   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, batch_eq_16) {
1143     TEST_REQUIRES_X86_SSE2;
1144     VMulMicrokernelTester()
1145       .batch_size(16)
1146       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1147   }
1148 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,batch_div_16)1149   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, batch_div_16) {
1150     TEST_REQUIRES_X86_SSE2;
1151     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1152       VMulMicrokernelTester()
1153         .batch_size(batch_size)
1154         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1155     }
1156   }
1157 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,batch_lt_16)1158   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, batch_lt_16) {
1159     TEST_REQUIRES_X86_SSE2;
1160     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1161       VMulMicrokernelTester()
1162         .batch_size(batch_size)
1163         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1164     }
1165   }
1166 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,batch_gt_16)1167   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, batch_gt_16) {
1168     TEST_REQUIRES_X86_SSE2;
1169     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1170       VMulMicrokernelTester()
1171         .batch_size(batch_size)
1172         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1173     }
1174   }
1175 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,inplace_a)1176   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, inplace_a) {
1177     TEST_REQUIRES_X86_SSE2;
1178     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1179       VMulMicrokernelTester()
1180         .batch_size(batch_size)
1181         .inplace_a(true)
1182         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1183     }
1184   }
1185 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,inplace_b)1186   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, inplace_b) {
1187     TEST_REQUIRES_X86_SSE2;
1188     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1189       VMulMicrokernelTester()
1190         .batch_size(batch_size)
1191         .inplace_b(true)
1192         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1193     }
1194   }
1195 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,inplace_a_and_b)1196   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, inplace_a_and_b) {
1197     TEST_REQUIRES_X86_SSE2;
1198     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1199       VMulMicrokernelTester()
1200         .batch_size(batch_size)
1201         .inplace_a(true)
1202         .inplace_b(true)
1203         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1204     }
1205   }
1206 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,a_zero_point)1207   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, a_zero_point) {
1208     TEST_REQUIRES_X86_SSE2;
1209     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1210       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1211         VMulMicrokernelTester()
1212           .batch_size(batch_size)
1213           .a_zero_point(a_zero_point)
1214           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1215       }
1216     }
1217   }
1218 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,b_zero_point)1219   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, b_zero_point) {
1220     TEST_REQUIRES_X86_SSE2;
1221     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1222       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1223         VMulMicrokernelTester()
1224           .batch_size(batch_size)
1225           .b_zero_point(b_zero_point)
1226           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1227       }
1228     }
1229   }
1230 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,y_zero_point)1231   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, y_zero_point) {
1232     TEST_REQUIRES_X86_SSE2;
1233     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1234       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1235         VMulMicrokernelTester()
1236           .batch_size(batch_size)
1237           .y_zero_point(y_zero_point)
1238           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1239       }
1240     }
1241   }
1242 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,a_scale)1243   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, a_scale) {
1244     TEST_REQUIRES_X86_SSE2;
1245     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1246       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1247         VMulMicrokernelTester()
1248           .batch_size(batch_size)
1249           .a_scale(a_scale)
1250           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1251       }
1252     }
1253   }
1254 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,b_scale)1255   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, b_scale) {
1256     TEST_REQUIRES_X86_SSE2;
1257     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1258       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1259         VMulMicrokernelTester()
1260           .batch_size(batch_size)
1261           .b_scale(b_scale)
1262           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1263       }
1264     }
1265   }
1266 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,y_scale)1267   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, y_scale) {
1268     TEST_REQUIRES_X86_SSE2;
1269     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1270       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1271         VMulMicrokernelTester()
1272           .batch_size(batch_size)
1273           .y_scale(y_scale)
1274           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1275       }
1276     }
1277   }
1278 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,qmin)1279   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, qmin) {
1280     TEST_REQUIRES_X86_SSE2;
1281     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1282       VMulMicrokernelTester()
1283         .batch_size(batch_size)
1284         .qmin(128)
1285         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1286     }
1287   }
1288 
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,qmax)1289   TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, qmax) {
1290     TEST_REQUIRES_X86_SSE2;
1291     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1292       VMulMicrokernelTester()
1293         .batch_size(batch_size)
1294         .qmax(128)
1295         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1296     }
1297   }
1298 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1299 
1300 
1301 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,batch_eq_8)1302   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, batch_eq_8) {
1303     TEST_REQUIRES_X86_SSE41;
1304     VMulMicrokernelTester()
1305       .batch_size(8)
1306       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1307   }
1308 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,batch_div_8)1309   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, batch_div_8) {
1310     TEST_REQUIRES_X86_SSE41;
1311     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1312       VMulMicrokernelTester()
1313         .batch_size(batch_size)
1314         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1315     }
1316   }
1317 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,batch_lt_8)1318   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, batch_lt_8) {
1319     TEST_REQUIRES_X86_SSE41;
1320     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1321       VMulMicrokernelTester()
1322         .batch_size(batch_size)
1323         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1324     }
1325   }
1326 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,batch_gt_8)1327   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, batch_gt_8) {
1328     TEST_REQUIRES_X86_SSE41;
1329     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1330       VMulMicrokernelTester()
1331         .batch_size(batch_size)
1332         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1333     }
1334   }
1335 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,inplace_a)1336   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, inplace_a) {
1337     TEST_REQUIRES_X86_SSE41;
1338     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1339       VMulMicrokernelTester()
1340         .batch_size(batch_size)
1341         .inplace_a(true)
1342         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1343     }
1344   }
1345 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,inplace_b)1346   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, inplace_b) {
1347     TEST_REQUIRES_X86_SSE41;
1348     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1349       VMulMicrokernelTester()
1350         .batch_size(batch_size)
1351         .inplace_b(true)
1352         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1353     }
1354   }
1355 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,inplace_a_and_b)1356   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, inplace_a_and_b) {
1357     TEST_REQUIRES_X86_SSE41;
1358     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1359       VMulMicrokernelTester()
1360         .batch_size(batch_size)
1361         .inplace_a(true)
1362         .inplace_b(true)
1363         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1364     }
1365   }
1366 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,a_zero_point)1367   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, a_zero_point) {
1368     TEST_REQUIRES_X86_SSE41;
1369     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1370       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1371         VMulMicrokernelTester()
1372           .batch_size(batch_size)
1373           .a_zero_point(a_zero_point)
1374           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1375       }
1376     }
1377   }
1378 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,b_zero_point)1379   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, b_zero_point) {
1380     TEST_REQUIRES_X86_SSE41;
1381     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1382       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1383         VMulMicrokernelTester()
1384           .batch_size(batch_size)
1385           .b_zero_point(b_zero_point)
1386           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1387       }
1388     }
1389   }
1390 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,y_zero_point)1391   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, y_zero_point) {
1392     TEST_REQUIRES_X86_SSE41;
1393     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1394       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1395         VMulMicrokernelTester()
1396           .batch_size(batch_size)
1397           .y_zero_point(y_zero_point)
1398           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1399       }
1400     }
1401   }
1402 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,a_scale)1403   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, a_scale) {
1404     TEST_REQUIRES_X86_SSE41;
1405     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1406       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1407         VMulMicrokernelTester()
1408           .batch_size(batch_size)
1409           .a_scale(a_scale)
1410           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1411       }
1412     }
1413   }
1414 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,b_scale)1415   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, b_scale) {
1416     TEST_REQUIRES_X86_SSE41;
1417     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1418       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1419         VMulMicrokernelTester()
1420           .batch_size(batch_size)
1421           .b_scale(b_scale)
1422           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1423       }
1424     }
1425   }
1426 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,y_scale)1427   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, y_scale) {
1428     TEST_REQUIRES_X86_SSE41;
1429     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1430       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1431         VMulMicrokernelTester()
1432           .batch_size(batch_size)
1433           .y_scale(y_scale)
1434           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1435       }
1436     }
1437   }
1438 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,qmin)1439   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, qmin) {
1440     TEST_REQUIRES_X86_SSE41;
1441     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1442       VMulMicrokernelTester()
1443         .batch_size(batch_size)
1444         .qmin(128)
1445         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1446     }
1447   }
1448 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,qmax)1449   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, qmax) {
1450     TEST_REQUIRES_X86_SSE41;
1451     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1452       VMulMicrokernelTester()
1453         .batch_size(batch_size)
1454         .qmax(128)
1455         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1456     }
1457   }
1458 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1459 
1460 
1461 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,batch_eq_16)1462   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, batch_eq_16) {
1463     TEST_REQUIRES_X86_SSE41;
1464     VMulMicrokernelTester()
1465       .batch_size(16)
1466       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1467   }
1468 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,batch_div_16)1469   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, batch_div_16) {
1470     TEST_REQUIRES_X86_SSE41;
1471     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1472       VMulMicrokernelTester()
1473         .batch_size(batch_size)
1474         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1475     }
1476   }
1477 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,batch_lt_16)1478   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, batch_lt_16) {
1479     TEST_REQUIRES_X86_SSE41;
1480     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1481       VMulMicrokernelTester()
1482         .batch_size(batch_size)
1483         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1484     }
1485   }
1486 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,batch_gt_16)1487   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, batch_gt_16) {
1488     TEST_REQUIRES_X86_SSE41;
1489     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1490       VMulMicrokernelTester()
1491         .batch_size(batch_size)
1492         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1493     }
1494   }
1495 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,inplace_a)1496   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, inplace_a) {
1497     TEST_REQUIRES_X86_SSE41;
1498     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1499       VMulMicrokernelTester()
1500         .batch_size(batch_size)
1501         .inplace_a(true)
1502         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1503     }
1504   }
1505 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,inplace_b)1506   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, inplace_b) {
1507     TEST_REQUIRES_X86_SSE41;
1508     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1509       VMulMicrokernelTester()
1510         .batch_size(batch_size)
1511         .inplace_b(true)
1512         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1513     }
1514   }
1515 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,inplace_a_and_b)1516   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, inplace_a_and_b) {
1517     TEST_REQUIRES_X86_SSE41;
1518     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1519       VMulMicrokernelTester()
1520         .batch_size(batch_size)
1521         .inplace_a(true)
1522         .inplace_b(true)
1523         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1524     }
1525   }
1526 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,a_zero_point)1527   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, a_zero_point) {
1528     TEST_REQUIRES_X86_SSE41;
1529     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1530       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1531         VMulMicrokernelTester()
1532           .batch_size(batch_size)
1533           .a_zero_point(a_zero_point)
1534           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1535       }
1536     }
1537   }
1538 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,b_zero_point)1539   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, b_zero_point) {
1540     TEST_REQUIRES_X86_SSE41;
1541     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1542       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1543         VMulMicrokernelTester()
1544           .batch_size(batch_size)
1545           .b_zero_point(b_zero_point)
1546           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1547       }
1548     }
1549   }
1550 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,y_zero_point)1551   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, y_zero_point) {
1552     TEST_REQUIRES_X86_SSE41;
1553     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1554       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1555         VMulMicrokernelTester()
1556           .batch_size(batch_size)
1557           .y_zero_point(y_zero_point)
1558           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1559       }
1560     }
1561   }
1562 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,a_scale)1563   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, a_scale) {
1564     TEST_REQUIRES_X86_SSE41;
1565     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1566       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1567         VMulMicrokernelTester()
1568           .batch_size(batch_size)
1569           .a_scale(a_scale)
1570           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1571       }
1572     }
1573   }
1574 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,b_scale)1575   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, b_scale) {
1576     TEST_REQUIRES_X86_SSE41;
1577     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1578       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1579         VMulMicrokernelTester()
1580           .batch_size(batch_size)
1581           .b_scale(b_scale)
1582           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1583       }
1584     }
1585   }
1586 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,y_scale)1587   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, y_scale) {
1588     TEST_REQUIRES_X86_SSE41;
1589     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1590       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1591         VMulMicrokernelTester()
1592           .batch_size(batch_size)
1593           .y_scale(y_scale)
1594           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1595       }
1596     }
1597   }
1598 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,qmin)1599   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, qmin) {
1600     TEST_REQUIRES_X86_SSE41;
1601     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1602       VMulMicrokernelTester()
1603         .batch_size(batch_size)
1604         .qmin(128)
1605         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1606     }
1607   }
1608 
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,qmax)1609   TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, qmax) {
1610     TEST_REQUIRES_X86_SSE41;
1611     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1612       VMulMicrokernelTester()
1613         .batch_size(batch_size)
1614         .qmax(128)
1615         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1616     }
1617   }
1618 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1619 
1620 
1621 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,batch_eq_8)1622   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, batch_eq_8) {
1623     TEST_REQUIRES_X86_AVX;
1624     VMulMicrokernelTester()
1625       .batch_size(8)
1626       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1627   }
1628 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,batch_div_8)1629   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, batch_div_8) {
1630     TEST_REQUIRES_X86_AVX;
1631     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1632       VMulMicrokernelTester()
1633         .batch_size(batch_size)
1634         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1635     }
1636   }
1637 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,batch_lt_8)1638   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, batch_lt_8) {
1639     TEST_REQUIRES_X86_AVX;
1640     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1641       VMulMicrokernelTester()
1642         .batch_size(batch_size)
1643         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1644     }
1645   }
1646 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,batch_gt_8)1647   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, batch_gt_8) {
1648     TEST_REQUIRES_X86_AVX;
1649     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1650       VMulMicrokernelTester()
1651         .batch_size(batch_size)
1652         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1653     }
1654   }
1655 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,inplace_a)1656   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, inplace_a) {
1657     TEST_REQUIRES_X86_AVX;
1658     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1659       VMulMicrokernelTester()
1660         .batch_size(batch_size)
1661         .inplace_a(true)
1662         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1663     }
1664   }
1665 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,inplace_b)1666   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, inplace_b) {
1667     TEST_REQUIRES_X86_AVX;
1668     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1669       VMulMicrokernelTester()
1670         .batch_size(batch_size)
1671         .inplace_b(true)
1672         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1673     }
1674   }
1675 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,inplace_a_and_b)1676   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, inplace_a_and_b) {
1677     TEST_REQUIRES_X86_AVX;
1678     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1679       VMulMicrokernelTester()
1680         .batch_size(batch_size)
1681         .inplace_a(true)
1682         .inplace_b(true)
1683         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1684     }
1685   }
1686 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,a_zero_point)1687   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, a_zero_point) {
1688     TEST_REQUIRES_X86_AVX;
1689     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1690       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1691         VMulMicrokernelTester()
1692           .batch_size(batch_size)
1693           .a_zero_point(a_zero_point)
1694           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1695       }
1696     }
1697   }
1698 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,b_zero_point)1699   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, b_zero_point) {
1700     TEST_REQUIRES_X86_AVX;
1701     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1702       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1703         VMulMicrokernelTester()
1704           .batch_size(batch_size)
1705           .b_zero_point(b_zero_point)
1706           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1707       }
1708     }
1709   }
1710 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,y_zero_point)1711   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, y_zero_point) {
1712     TEST_REQUIRES_X86_AVX;
1713     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1714       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1715         VMulMicrokernelTester()
1716           .batch_size(batch_size)
1717           .y_zero_point(y_zero_point)
1718           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1719       }
1720     }
1721   }
1722 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,a_scale)1723   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, a_scale) {
1724     TEST_REQUIRES_X86_AVX;
1725     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1726       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1727         VMulMicrokernelTester()
1728           .batch_size(batch_size)
1729           .a_scale(a_scale)
1730           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1731       }
1732     }
1733   }
1734 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,b_scale)1735   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, b_scale) {
1736     TEST_REQUIRES_X86_AVX;
1737     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1738       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1739         VMulMicrokernelTester()
1740           .batch_size(batch_size)
1741           .b_scale(b_scale)
1742           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1743       }
1744     }
1745   }
1746 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,y_scale)1747   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, y_scale) {
1748     TEST_REQUIRES_X86_AVX;
1749     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1750       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1751         VMulMicrokernelTester()
1752           .batch_size(batch_size)
1753           .y_scale(y_scale)
1754           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1755       }
1756     }
1757   }
1758 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,qmin)1759   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, qmin) {
1760     TEST_REQUIRES_X86_AVX;
1761     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1762       VMulMicrokernelTester()
1763         .batch_size(batch_size)
1764         .qmin(128)
1765         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1766     }
1767   }
1768 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,qmax)1769   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, qmax) {
1770     TEST_REQUIRES_X86_AVX;
1771     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1772       VMulMicrokernelTester()
1773         .batch_size(batch_size)
1774         .qmax(128)
1775         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1776     }
1777   }
1778 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1779 
1780 
1781 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,batch_eq_16)1782   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, batch_eq_16) {
1783     TEST_REQUIRES_X86_AVX;
1784     VMulMicrokernelTester()
1785       .batch_size(16)
1786       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1787   }
1788 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,batch_div_16)1789   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, batch_div_16) {
1790     TEST_REQUIRES_X86_AVX;
1791     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1792       VMulMicrokernelTester()
1793         .batch_size(batch_size)
1794         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1795     }
1796   }
1797 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,batch_lt_16)1798   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, batch_lt_16) {
1799     TEST_REQUIRES_X86_AVX;
1800     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1801       VMulMicrokernelTester()
1802         .batch_size(batch_size)
1803         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1804     }
1805   }
1806 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,batch_gt_16)1807   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, batch_gt_16) {
1808     TEST_REQUIRES_X86_AVX;
1809     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1810       VMulMicrokernelTester()
1811         .batch_size(batch_size)
1812         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1813     }
1814   }
1815 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,inplace_a)1816   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, inplace_a) {
1817     TEST_REQUIRES_X86_AVX;
1818     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1819       VMulMicrokernelTester()
1820         .batch_size(batch_size)
1821         .inplace_a(true)
1822         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1823     }
1824   }
1825 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,inplace_b)1826   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, inplace_b) {
1827     TEST_REQUIRES_X86_AVX;
1828     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1829       VMulMicrokernelTester()
1830         .batch_size(batch_size)
1831         .inplace_b(true)
1832         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1833     }
1834   }
1835 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,inplace_a_and_b)1836   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, inplace_a_and_b) {
1837     TEST_REQUIRES_X86_AVX;
1838     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1839       VMulMicrokernelTester()
1840         .batch_size(batch_size)
1841         .inplace_a(true)
1842         .inplace_b(true)
1843         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1844     }
1845   }
1846 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,a_zero_point)1847   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, a_zero_point) {
1848     TEST_REQUIRES_X86_AVX;
1849     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1850       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1851         VMulMicrokernelTester()
1852           .batch_size(batch_size)
1853           .a_zero_point(a_zero_point)
1854           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1855       }
1856     }
1857   }
1858 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,b_zero_point)1859   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, b_zero_point) {
1860     TEST_REQUIRES_X86_AVX;
1861     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1862       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1863         VMulMicrokernelTester()
1864           .batch_size(batch_size)
1865           .b_zero_point(b_zero_point)
1866           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1867       }
1868     }
1869   }
1870 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,y_zero_point)1871   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, y_zero_point) {
1872     TEST_REQUIRES_X86_AVX;
1873     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1874       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1875         VMulMicrokernelTester()
1876           .batch_size(batch_size)
1877           .y_zero_point(y_zero_point)
1878           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1879       }
1880     }
1881   }
1882 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,a_scale)1883   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, a_scale) {
1884     TEST_REQUIRES_X86_AVX;
1885     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1886       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1887         VMulMicrokernelTester()
1888           .batch_size(batch_size)
1889           .a_scale(a_scale)
1890           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1891       }
1892     }
1893   }
1894 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,b_scale)1895   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, b_scale) {
1896     TEST_REQUIRES_X86_AVX;
1897     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1898       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1899         VMulMicrokernelTester()
1900           .batch_size(batch_size)
1901           .b_scale(b_scale)
1902           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1903       }
1904     }
1905   }
1906 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,y_scale)1907   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, y_scale) {
1908     TEST_REQUIRES_X86_AVX;
1909     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1910       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1911         VMulMicrokernelTester()
1912           .batch_size(batch_size)
1913           .y_scale(y_scale)
1914           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1915       }
1916     }
1917   }
1918 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,qmin)1919   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, qmin) {
1920     TEST_REQUIRES_X86_AVX;
1921     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1922       VMulMicrokernelTester()
1923         .batch_size(batch_size)
1924         .qmin(128)
1925         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1926     }
1927   }
1928 
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,qmax)1929   TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, qmax) {
1930     TEST_REQUIRES_X86_AVX;
1931     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1932       VMulMicrokernelTester()
1933         .batch_size(batch_size)
1934         .qmax(128)
1935         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1936     }
1937   }
1938 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1939 
1940 
1941 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,batch_eq_8)1942   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, batch_eq_8) {
1943     VMulMicrokernelTester()
1944       .batch_size(8)
1945       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1946   }
1947 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,batch_div_8)1948   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, batch_div_8) {
1949     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1950       VMulMicrokernelTester()
1951         .batch_size(batch_size)
1952         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1953     }
1954   }
1955 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,batch_lt_8)1956   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, batch_lt_8) {
1957     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1958       VMulMicrokernelTester()
1959         .batch_size(batch_size)
1960         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1961     }
1962   }
1963 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,batch_gt_8)1964   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, batch_gt_8) {
1965     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1966       VMulMicrokernelTester()
1967         .batch_size(batch_size)
1968         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1969     }
1970   }
1971 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,inplace_a)1972   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, inplace_a) {
1973     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1974       VMulMicrokernelTester()
1975         .batch_size(batch_size)
1976         .inplace_a(true)
1977         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1978     }
1979   }
1980 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,inplace_b)1981   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, inplace_b) {
1982     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1983       VMulMicrokernelTester()
1984         .batch_size(batch_size)
1985         .inplace_b(true)
1986         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1987     }
1988   }
1989 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,inplace_a_and_b)1990   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, inplace_a_and_b) {
1991     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1992       VMulMicrokernelTester()
1993         .batch_size(batch_size)
1994         .inplace_a(true)
1995         .inplace_b(true)
1996         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1997     }
1998   }
1999 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,a_zero_point)2000   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, a_zero_point) {
2001     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2002       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2003         VMulMicrokernelTester()
2004           .batch_size(batch_size)
2005           .a_zero_point(a_zero_point)
2006           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2007       }
2008     }
2009   }
2010 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,b_zero_point)2011   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, b_zero_point) {
2012     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2013       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2014         VMulMicrokernelTester()
2015           .batch_size(batch_size)
2016           .b_zero_point(b_zero_point)
2017           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2018       }
2019     }
2020   }
2021 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,y_zero_point)2022   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, y_zero_point) {
2023     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2024       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2025         VMulMicrokernelTester()
2026           .batch_size(batch_size)
2027           .y_zero_point(y_zero_point)
2028           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2029       }
2030     }
2031   }
2032 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,a_scale)2033   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, a_scale) {
2034     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2035       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2036         VMulMicrokernelTester()
2037           .batch_size(batch_size)
2038           .a_scale(a_scale)
2039           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2040       }
2041     }
2042   }
2043 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,b_scale)2044   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, b_scale) {
2045     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2046       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2047         VMulMicrokernelTester()
2048           .batch_size(batch_size)
2049           .b_scale(b_scale)
2050           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2051       }
2052     }
2053   }
2054 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,y_scale)2055   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, y_scale) {
2056     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2057       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2058         VMulMicrokernelTester()
2059           .batch_size(batch_size)
2060           .y_scale(y_scale)
2061           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2062       }
2063     }
2064   }
2065 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,qmin)2066   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, qmin) {
2067     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2068       VMulMicrokernelTester()
2069         .batch_size(batch_size)
2070         .qmin(128)
2071         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2072     }
2073   }
2074 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,qmax)2075   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, qmax) {
2076     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2077       VMulMicrokernelTester()
2078         .batch_size(batch_size)
2079         .qmax(128)
2080         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2081     }
2082   }
2083 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2084 
2085 
2086 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,batch_eq_16)2087   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, batch_eq_16) {
2088     VMulMicrokernelTester()
2089       .batch_size(16)
2090       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2091   }
2092 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,batch_div_16)2093   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, batch_div_16) {
2094     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2095       VMulMicrokernelTester()
2096         .batch_size(batch_size)
2097         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2098     }
2099   }
2100 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,batch_lt_16)2101   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, batch_lt_16) {
2102     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2103       VMulMicrokernelTester()
2104         .batch_size(batch_size)
2105         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2106     }
2107   }
2108 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,batch_gt_16)2109   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, batch_gt_16) {
2110     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2111       VMulMicrokernelTester()
2112         .batch_size(batch_size)
2113         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2114     }
2115   }
2116 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,inplace_a)2117   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, inplace_a) {
2118     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2119       VMulMicrokernelTester()
2120         .batch_size(batch_size)
2121         .inplace_a(true)
2122         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2123     }
2124   }
2125 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,inplace_b)2126   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, inplace_b) {
2127     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2128       VMulMicrokernelTester()
2129         .batch_size(batch_size)
2130         .inplace_b(true)
2131         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2132     }
2133   }
2134 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,inplace_a_and_b)2135   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, inplace_a_and_b) {
2136     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2137       VMulMicrokernelTester()
2138         .batch_size(batch_size)
2139         .inplace_a(true)
2140         .inplace_b(true)
2141         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2142     }
2143   }
2144 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,a_zero_point)2145   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, a_zero_point) {
2146     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2147       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2148         VMulMicrokernelTester()
2149           .batch_size(batch_size)
2150           .a_zero_point(a_zero_point)
2151           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2152       }
2153     }
2154   }
2155 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,b_zero_point)2156   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, b_zero_point) {
2157     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2158       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2159         VMulMicrokernelTester()
2160           .batch_size(batch_size)
2161           .b_zero_point(b_zero_point)
2162           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2163       }
2164     }
2165   }
2166 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,y_zero_point)2167   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, y_zero_point) {
2168     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2169       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2170         VMulMicrokernelTester()
2171           .batch_size(batch_size)
2172           .y_zero_point(y_zero_point)
2173           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2174       }
2175     }
2176   }
2177 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,a_scale)2178   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, a_scale) {
2179     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2180       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2181         VMulMicrokernelTester()
2182           .batch_size(batch_size)
2183           .a_scale(a_scale)
2184           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2185       }
2186     }
2187   }
2188 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,b_scale)2189   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, b_scale) {
2190     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2191       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2192         VMulMicrokernelTester()
2193           .batch_size(batch_size)
2194           .b_scale(b_scale)
2195           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2196       }
2197     }
2198   }
2199 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,y_scale)2200   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, y_scale) {
2201     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2202       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2203         VMulMicrokernelTester()
2204           .batch_size(batch_size)
2205           .y_scale(y_scale)
2206           .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2207       }
2208     }
2209   }
2210 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,qmin)2211   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, qmin) {
2212     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2213       VMulMicrokernelTester()
2214         .batch_size(batch_size)
2215         .qmin(128)
2216         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2217     }
2218   }
2219 
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,qmax)2220   TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, qmax) {
2221     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2222       VMulMicrokernelTester()
2223         .batch_size(batch_size)
2224         .qmax(128)
2225         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2226     }
2227   }
2228 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2229 
2230 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,batch_eq_1)2231 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, batch_eq_1) {
2232   VMulMicrokernelTester()
2233     .batch_size(1)
2234     .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2235 }
2236 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,batch_gt_1)2237 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, batch_gt_1) {
2238   for (size_t batch_size = 2; batch_size < 10; batch_size++) {
2239     VMulMicrokernelTester()
2240       .batch_size(batch_size)
2241       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2242   }
2243 }
2244 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,inplace_a)2245 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, inplace_a) {
2246   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2247     VMulMicrokernelTester()
2248       .batch_size(batch_size)
2249       .inplace_a(true)
2250       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2251   }
2252 }
2253 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,inplace_b)2254 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, inplace_b) {
2255   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2256     VMulMicrokernelTester()
2257       .batch_size(batch_size)
2258       .inplace_b(true)
2259       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2260   }
2261 }
2262 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,inplace_a_and_b)2263 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, inplace_a_and_b) {
2264   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2265     VMulMicrokernelTester()
2266       .batch_size(batch_size)
2267       .inplace_a(true)
2268       .inplace_b(true)
2269       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2270   }
2271 }
2272 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,a_zero_point)2273 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, a_zero_point) {
2274   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2275     for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2276       VMulMicrokernelTester()
2277         .batch_size(batch_size)
2278         .a_zero_point(a_zero_point)
2279         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2280     }
2281   }
2282 }
2283 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,b_zero_point)2284 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, b_zero_point) {
2285   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2286     for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2287       VMulMicrokernelTester()
2288         .batch_size(batch_size)
2289         .b_zero_point(b_zero_point)
2290         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2291     }
2292   }
2293 }
2294 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,y_zero_point)2295 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, y_zero_point) {
2296   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2297     for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2298       VMulMicrokernelTester()
2299         .batch_size(batch_size)
2300         .y_zero_point(y_zero_point)
2301         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2302     }
2303   }
2304 }
2305 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,a_scale)2306 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, a_scale) {
2307   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2308     for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2309       VMulMicrokernelTester()
2310         .batch_size(batch_size)
2311         .a_scale(a_scale)
2312         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2313     }
2314   }
2315 }
2316 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,b_scale)2317 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, b_scale) {
2318   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2319     for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2320       VMulMicrokernelTester()
2321         .batch_size(batch_size)
2322         .b_scale(b_scale)
2323         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2324     }
2325   }
2326 }
2327 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,y_scale)2328 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, y_scale) {
2329   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2330     for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2331       VMulMicrokernelTester()
2332         .batch_size(batch_size)
2333         .y_scale(y_scale)
2334         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2335     }
2336   }
2337 }
2338 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,qmin)2339 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, qmin) {
2340   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2341     VMulMicrokernelTester()
2342       .batch_size(batch_size)
2343       .qmin(128)
2344       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2345   }
2346 }
2347 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,qmax)2348 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, qmax) {
2349   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2350     VMulMicrokernelTester()
2351       .batch_size(batch_size)
2352       .qmax(128)
2353       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2354   }
2355 }
2356 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,batch_eq_2)2357 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, batch_eq_2) {
2358   VMulMicrokernelTester()
2359     .batch_size(2)
2360     .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2361 }
2362 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,batch_div_2)2363 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, batch_div_2) {
2364   for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
2365     VMulMicrokernelTester()
2366       .batch_size(batch_size)
2367       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2368   }
2369 }
2370 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,batch_lt_2)2371 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, batch_lt_2) {
2372   for (size_t batch_size = 1; batch_size < 2; batch_size++) {
2373     VMulMicrokernelTester()
2374       .batch_size(batch_size)
2375       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2376   }
2377 }
2378 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,batch_gt_2)2379 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, batch_gt_2) {
2380   for (size_t batch_size = 3; batch_size < 4; batch_size++) {
2381     VMulMicrokernelTester()
2382       .batch_size(batch_size)
2383       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2384   }
2385 }
2386 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,inplace_a)2387 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, inplace_a) {
2388   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2389     VMulMicrokernelTester()
2390       .batch_size(batch_size)
2391       .inplace_a(true)
2392       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2393   }
2394 }
2395 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,inplace_b)2396 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, inplace_b) {
2397   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2398     VMulMicrokernelTester()
2399       .batch_size(batch_size)
2400       .inplace_b(true)
2401       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2402   }
2403 }
2404 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,inplace_a_and_b)2405 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, inplace_a_and_b) {
2406   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2407     VMulMicrokernelTester()
2408       .batch_size(batch_size)
2409       .inplace_a(true)
2410       .inplace_b(true)
2411       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2412   }
2413 }
2414 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,a_zero_point)2415 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, a_zero_point) {
2416   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2417     for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2418       VMulMicrokernelTester()
2419         .batch_size(batch_size)
2420         .a_zero_point(a_zero_point)
2421         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2422     }
2423   }
2424 }
2425 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,b_zero_point)2426 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, b_zero_point) {
2427   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2428     for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2429       VMulMicrokernelTester()
2430         .batch_size(batch_size)
2431         .b_zero_point(b_zero_point)
2432         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2433     }
2434   }
2435 }
2436 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,y_zero_point)2437 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, y_zero_point) {
2438   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2439     for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2440       VMulMicrokernelTester()
2441         .batch_size(batch_size)
2442         .y_zero_point(y_zero_point)
2443         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2444     }
2445   }
2446 }
2447 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,a_scale)2448 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, a_scale) {
2449   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2450     for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2451       VMulMicrokernelTester()
2452         .batch_size(batch_size)
2453         .a_scale(a_scale)
2454         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2455     }
2456   }
2457 }
2458 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,b_scale)2459 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, b_scale) {
2460   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2461     for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2462       VMulMicrokernelTester()
2463         .batch_size(batch_size)
2464         .b_scale(b_scale)
2465         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2466     }
2467   }
2468 }
2469 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,y_scale)2470 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, y_scale) {
2471   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2472     for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2473       VMulMicrokernelTester()
2474         .batch_size(batch_size)
2475         .y_scale(y_scale)
2476         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2477     }
2478   }
2479 }
2480 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,qmin)2481 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, qmin) {
2482   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2483     VMulMicrokernelTester()
2484       .batch_size(batch_size)
2485       .qmin(128)
2486       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2487   }
2488 }
2489 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,qmax)2490 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, qmax) {
2491   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2492     VMulMicrokernelTester()
2493       .batch_size(batch_size)
2494       .qmax(128)
2495       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2496   }
2497 }
2498 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,batch_eq_4)2499 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, batch_eq_4) {
2500   VMulMicrokernelTester()
2501     .batch_size(4)
2502     .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2503 }
2504 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,batch_div_4)2505 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, batch_div_4) {
2506   for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
2507     VMulMicrokernelTester()
2508       .batch_size(batch_size)
2509       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2510   }
2511 }
2512 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,batch_lt_4)2513 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, batch_lt_4) {
2514   for (size_t batch_size = 1; batch_size < 4; batch_size++) {
2515     VMulMicrokernelTester()
2516       .batch_size(batch_size)
2517       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2518   }
2519 }
2520 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,batch_gt_4)2521 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, batch_gt_4) {
2522   for (size_t batch_size = 5; batch_size < 8; batch_size++) {
2523     VMulMicrokernelTester()
2524       .batch_size(batch_size)
2525       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2526   }
2527 }
2528 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,inplace_a)2529 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, inplace_a) {
2530   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2531     VMulMicrokernelTester()
2532       .batch_size(batch_size)
2533       .inplace_a(true)
2534       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2535   }
2536 }
2537 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,inplace_b)2538 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, inplace_b) {
2539   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2540     VMulMicrokernelTester()
2541       .batch_size(batch_size)
2542       .inplace_b(true)
2543       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2544   }
2545 }
2546 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,inplace_a_and_b)2547 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, inplace_a_and_b) {
2548   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2549     VMulMicrokernelTester()
2550       .batch_size(batch_size)
2551       .inplace_a(true)
2552       .inplace_b(true)
2553       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2554   }
2555 }
2556 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,a_zero_point)2557 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, a_zero_point) {
2558   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2559     for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2560       VMulMicrokernelTester()
2561         .batch_size(batch_size)
2562         .a_zero_point(a_zero_point)
2563         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2564     }
2565   }
2566 }
2567 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,b_zero_point)2568 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, b_zero_point) {
2569   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2570     for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2571       VMulMicrokernelTester()
2572         .batch_size(batch_size)
2573         .b_zero_point(b_zero_point)
2574         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2575     }
2576   }
2577 }
2578 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,y_zero_point)2579 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, y_zero_point) {
2580   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2581     for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2582       VMulMicrokernelTester()
2583         .batch_size(batch_size)
2584         .y_zero_point(y_zero_point)
2585         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2586     }
2587   }
2588 }
2589 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,a_scale)2590 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, a_scale) {
2591   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2592     for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2593       VMulMicrokernelTester()
2594         .batch_size(batch_size)
2595         .a_scale(a_scale)
2596         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2597     }
2598   }
2599 }
2600 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,b_scale)2601 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, b_scale) {
2602   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2603     for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2604       VMulMicrokernelTester()
2605         .batch_size(batch_size)
2606         .b_scale(b_scale)
2607         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2608     }
2609   }
2610 }
2611 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,y_scale)2612 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, y_scale) {
2613   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2614     for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2615       VMulMicrokernelTester()
2616         .batch_size(batch_size)
2617         .y_scale(y_scale)
2618         .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2619     }
2620   }
2621 }
2622 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,qmin)2623 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, qmin) {
2624   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2625     VMulMicrokernelTester()
2626       .batch_size(batch_size)
2627       .qmin(128)
2628       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2629   }
2630 }
2631 
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,qmax)2632 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, qmax) {
2633   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2634     VMulMicrokernelTester()
2635       .batch_size(batch_size)
2636       .qmax(128)
2637       .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2638   }
2639 }