1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 // Specification: test/qs8-vmul-minmax-fp32.yaml
8 // Generator: tools/generate-vbinary-test.py
9
10
11 #include <gtest/gtest.h>
12
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15
16 #include <xnnpack/microparams-init.h>
17 #include <xnnpack/vmul.h>
18 #include "vmul-microkernel-tester.h"
19
20
21 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,batch_eq_8)22 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, batch_eq_8) {
23 TEST_REQUIRES_ARM_NEON;
24 VMulMicrokernelTester()
25 .batch_size(8)
26 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
27 }
28
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,batch_div_8)29 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, batch_div_8) {
30 TEST_REQUIRES_ARM_NEON;
31 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
32 VMulMicrokernelTester()
33 .batch_size(batch_size)
34 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
35 }
36 }
37
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,batch_lt_8)38 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, batch_lt_8) {
39 TEST_REQUIRES_ARM_NEON;
40 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
41 VMulMicrokernelTester()
42 .batch_size(batch_size)
43 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
44 }
45 }
46
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,batch_gt_8)47 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, batch_gt_8) {
48 TEST_REQUIRES_ARM_NEON;
49 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
50 VMulMicrokernelTester()
51 .batch_size(batch_size)
52 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
53 }
54 }
55
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,inplace_a)56 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, inplace_a) {
57 TEST_REQUIRES_ARM_NEON;
58 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
59 VMulMicrokernelTester()
60 .batch_size(batch_size)
61 .inplace_a(true)
62 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
63 }
64 }
65
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,inplace_b)66 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, inplace_b) {
67 TEST_REQUIRES_ARM_NEON;
68 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
69 VMulMicrokernelTester()
70 .batch_size(batch_size)
71 .inplace_b(true)
72 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
73 }
74 }
75
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,inplace_a_and_b)76 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, inplace_a_and_b) {
77 TEST_REQUIRES_ARM_NEON;
78 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
79 VMulMicrokernelTester()
80 .batch_size(batch_size)
81 .inplace_a(true)
82 .inplace_b(true)
83 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
84 }
85 }
86
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,a_zero_point)87 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, a_zero_point) {
88 TEST_REQUIRES_ARM_NEON;
89 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
90 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
91 VMulMicrokernelTester()
92 .batch_size(batch_size)
93 .a_zero_point(a_zero_point)
94 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
95 }
96 }
97 }
98
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,b_zero_point)99 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, b_zero_point) {
100 TEST_REQUIRES_ARM_NEON;
101 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
102 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
103 VMulMicrokernelTester()
104 .batch_size(batch_size)
105 .b_zero_point(b_zero_point)
106 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
107 }
108 }
109 }
110
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,y_zero_point)111 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, y_zero_point) {
112 TEST_REQUIRES_ARM_NEON;
113 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
114 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
115 VMulMicrokernelTester()
116 .batch_size(batch_size)
117 .y_zero_point(y_zero_point)
118 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
119 }
120 }
121 }
122
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,a_scale)123 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, a_scale) {
124 TEST_REQUIRES_ARM_NEON;
125 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
126 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
127 VMulMicrokernelTester()
128 .batch_size(batch_size)
129 .a_scale(a_scale)
130 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
131 }
132 }
133 }
134
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,b_scale)135 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, b_scale) {
136 TEST_REQUIRES_ARM_NEON;
137 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
138 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
139 VMulMicrokernelTester()
140 .batch_size(batch_size)
141 .b_scale(b_scale)
142 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
143 }
144 }
145 }
146
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,y_scale)147 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, y_scale) {
148 TEST_REQUIRES_ARM_NEON;
149 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
150 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
151 VMulMicrokernelTester()
152 .batch_size(batch_size)
153 .y_scale(y_scale)
154 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
155 }
156 }
157 }
158
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,qmin)159 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, qmin) {
160 TEST_REQUIRES_ARM_NEON;
161 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
162 VMulMicrokernelTester()
163 .batch_size(batch_size)
164 .qmin(128)
165 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
166 }
167 }
168
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8,qmax)169 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X8, qmax) {
170 TEST_REQUIRES_ARM_NEON;
171 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
172 VMulMicrokernelTester()
173 .batch_size(batch_size)
174 .qmax(128)
175 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
176 }
177 }
178 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
179
180
181 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,batch_eq_16)182 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, batch_eq_16) {
183 TEST_REQUIRES_ARM_NEON;
184 VMulMicrokernelTester()
185 .batch_size(16)
186 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
187 }
188
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,batch_div_16)189 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, batch_div_16) {
190 TEST_REQUIRES_ARM_NEON;
191 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
192 VMulMicrokernelTester()
193 .batch_size(batch_size)
194 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
195 }
196 }
197
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,batch_lt_16)198 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, batch_lt_16) {
199 TEST_REQUIRES_ARM_NEON;
200 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
201 VMulMicrokernelTester()
202 .batch_size(batch_size)
203 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
204 }
205 }
206
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,batch_gt_16)207 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, batch_gt_16) {
208 TEST_REQUIRES_ARM_NEON;
209 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
210 VMulMicrokernelTester()
211 .batch_size(batch_size)
212 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
213 }
214 }
215
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,inplace_a)216 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, inplace_a) {
217 TEST_REQUIRES_ARM_NEON;
218 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
219 VMulMicrokernelTester()
220 .batch_size(batch_size)
221 .inplace_a(true)
222 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
223 }
224 }
225
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,inplace_b)226 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, inplace_b) {
227 TEST_REQUIRES_ARM_NEON;
228 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
229 VMulMicrokernelTester()
230 .batch_size(batch_size)
231 .inplace_b(true)
232 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
233 }
234 }
235
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,inplace_a_and_b)236 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, inplace_a_and_b) {
237 TEST_REQUIRES_ARM_NEON;
238 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
239 VMulMicrokernelTester()
240 .batch_size(batch_size)
241 .inplace_a(true)
242 .inplace_b(true)
243 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
244 }
245 }
246
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,a_zero_point)247 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, a_zero_point) {
248 TEST_REQUIRES_ARM_NEON;
249 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
250 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
251 VMulMicrokernelTester()
252 .batch_size(batch_size)
253 .a_zero_point(a_zero_point)
254 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
255 }
256 }
257 }
258
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,b_zero_point)259 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, b_zero_point) {
260 TEST_REQUIRES_ARM_NEON;
261 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
262 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
263 VMulMicrokernelTester()
264 .batch_size(batch_size)
265 .b_zero_point(b_zero_point)
266 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
267 }
268 }
269 }
270
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,y_zero_point)271 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, y_zero_point) {
272 TEST_REQUIRES_ARM_NEON;
273 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
274 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
275 VMulMicrokernelTester()
276 .batch_size(batch_size)
277 .y_zero_point(y_zero_point)
278 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
279 }
280 }
281 }
282
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,a_scale)283 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, a_scale) {
284 TEST_REQUIRES_ARM_NEON;
285 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
286 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
287 VMulMicrokernelTester()
288 .batch_size(batch_size)
289 .a_scale(a_scale)
290 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
291 }
292 }
293 }
294
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,b_scale)295 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, b_scale) {
296 TEST_REQUIRES_ARM_NEON;
297 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
298 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
299 VMulMicrokernelTester()
300 .batch_size(batch_size)
301 .b_scale(b_scale)
302 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
303 }
304 }
305 }
306
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,y_scale)307 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, y_scale) {
308 TEST_REQUIRES_ARM_NEON;
309 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
310 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
311 VMulMicrokernelTester()
312 .batch_size(batch_size)
313 .y_scale(y_scale)
314 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
315 }
316 }
317 }
318
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,qmin)319 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, qmin) {
320 TEST_REQUIRES_ARM_NEON;
321 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
322 VMulMicrokernelTester()
323 .batch_size(batch_size)
324 .qmin(128)
325 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
326 }
327 }
328
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16,qmax)329 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD64_X16, qmax) {
330 TEST_REQUIRES_ARM_NEON;
331 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
332 VMulMicrokernelTester()
333 .batch_size(batch_size)
334 .qmax(128)
335 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
336 }
337 }
338 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
339
340
341 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,batch_eq_16)342 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, batch_eq_16) {
343 TEST_REQUIRES_ARM_NEON;
344 VMulMicrokernelTester()
345 .batch_size(16)
346 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
347 }
348
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,batch_div_16)349 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, batch_div_16) {
350 TEST_REQUIRES_ARM_NEON;
351 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
352 VMulMicrokernelTester()
353 .batch_size(batch_size)
354 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
355 }
356 }
357
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,batch_lt_16)358 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, batch_lt_16) {
359 TEST_REQUIRES_ARM_NEON;
360 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
361 VMulMicrokernelTester()
362 .batch_size(batch_size)
363 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
364 }
365 }
366
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,batch_gt_16)367 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, batch_gt_16) {
368 TEST_REQUIRES_ARM_NEON;
369 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
370 VMulMicrokernelTester()
371 .batch_size(batch_size)
372 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
373 }
374 }
375
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,inplace_a)376 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, inplace_a) {
377 TEST_REQUIRES_ARM_NEON;
378 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
379 VMulMicrokernelTester()
380 .batch_size(batch_size)
381 .inplace_a(true)
382 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
383 }
384 }
385
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,inplace_b)386 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, inplace_b) {
387 TEST_REQUIRES_ARM_NEON;
388 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
389 VMulMicrokernelTester()
390 .batch_size(batch_size)
391 .inplace_b(true)
392 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
393 }
394 }
395
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,inplace_a_and_b)396 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, inplace_a_and_b) {
397 TEST_REQUIRES_ARM_NEON;
398 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
399 VMulMicrokernelTester()
400 .batch_size(batch_size)
401 .inplace_a(true)
402 .inplace_b(true)
403 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
404 }
405 }
406
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,a_zero_point)407 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, a_zero_point) {
408 TEST_REQUIRES_ARM_NEON;
409 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
410 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
411 VMulMicrokernelTester()
412 .batch_size(batch_size)
413 .a_zero_point(a_zero_point)
414 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
415 }
416 }
417 }
418
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,b_zero_point)419 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, b_zero_point) {
420 TEST_REQUIRES_ARM_NEON;
421 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
422 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
423 VMulMicrokernelTester()
424 .batch_size(batch_size)
425 .b_zero_point(b_zero_point)
426 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
427 }
428 }
429 }
430
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,y_zero_point)431 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, y_zero_point) {
432 TEST_REQUIRES_ARM_NEON;
433 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
434 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
435 VMulMicrokernelTester()
436 .batch_size(batch_size)
437 .y_zero_point(y_zero_point)
438 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
439 }
440 }
441 }
442
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,a_scale)443 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, a_scale) {
444 TEST_REQUIRES_ARM_NEON;
445 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
446 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
447 VMulMicrokernelTester()
448 .batch_size(batch_size)
449 .a_scale(a_scale)
450 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
451 }
452 }
453 }
454
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,b_scale)455 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, b_scale) {
456 TEST_REQUIRES_ARM_NEON;
457 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
458 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
459 VMulMicrokernelTester()
460 .batch_size(batch_size)
461 .b_scale(b_scale)
462 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
463 }
464 }
465 }
466
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,y_scale)467 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, y_scale) {
468 TEST_REQUIRES_ARM_NEON;
469 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
470 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
471 VMulMicrokernelTester()
472 .batch_size(batch_size)
473 .y_scale(y_scale)
474 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
475 }
476 }
477 }
478
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,qmin)479 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, qmin) {
480 TEST_REQUIRES_ARM_NEON;
481 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
482 VMulMicrokernelTester()
483 .batch_size(batch_size)
484 .qmin(128)
485 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
486 }
487 }
488
TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16,qmax)489 TEST(QS8_VMUL_MINMAX_FP32__NEON_LD128_X16, qmax) {
490 TEST_REQUIRES_ARM_NEON;
491 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
492 VMulMicrokernelTester()
493 .batch_size(batch_size)
494 .qmax(128)
495 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
496 }
497 }
498 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
499
500
501 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,batch_eq_8)502 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, batch_eq_8) {
503 TEST_REQUIRES_ARM_NEON_V8;
504 VMulMicrokernelTester()
505 .batch_size(8)
506 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
507 }
508
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,batch_div_8)509 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, batch_div_8) {
510 TEST_REQUIRES_ARM_NEON_V8;
511 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
512 VMulMicrokernelTester()
513 .batch_size(batch_size)
514 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
515 }
516 }
517
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,batch_lt_8)518 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, batch_lt_8) {
519 TEST_REQUIRES_ARM_NEON_V8;
520 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
521 VMulMicrokernelTester()
522 .batch_size(batch_size)
523 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
524 }
525 }
526
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,batch_gt_8)527 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, batch_gt_8) {
528 TEST_REQUIRES_ARM_NEON_V8;
529 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
530 VMulMicrokernelTester()
531 .batch_size(batch_size)
532 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
533 }
534 }
535
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,inplace_a)536 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, inplace_a) {
537 TEST_REQUIRES_ARM_NEON_V8;
538 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
539 VMulMicrokernelTester()
540 .batch_size(batch_size)
541 .inplace_a(true)
542 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
543 }
544 }
545
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,inplace_b)546 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, inplace_b) {
547 TEST_REQUIRES_ARM_NEON_V8;
548 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
549 VMulMicrokernelTester()
550 .batch_size(batch_size)
551 .inplace_b(true)
552 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
553 }
554 }
555
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,inplace_a_and_b)556 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, inplace_a_and_b) {
557 TEST_REQUIRES_ARM_NEON_V8;
558 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
559 VMulMicrokernelTester()
560 .batch_size(batch_size)
561 .inplace_a(true)
562 .inplace_b(true)
563 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
564 }
565 }
566
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,a_zero_point)567 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, a_zero_point) {
568 TEST_REQUIRES_ARM_NEON_V8;
569 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
570 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
571 VMulMicrokernelTester()
572 .batch_size(batch_size)
573 .a_zero_point(a_zero_point)
574 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
575 }
576 }
577 }
578
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,b_zero_point)579 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, b_zero_point) {
580 TEST_REQUIRES_ARM_NEON_V8;
581 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
582 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
583 VMulMicrokernelTester()
584 .batch_size(batch_size)
585 .b_zero_point(b_zero_point)
586 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
587 }
588 }
589 }
590
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,y_zero_point)591 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, y_zero_point) {
592 TEST_REQUIRES_ARM_NEON_V8;
593 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
594 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
595 VMulMicrokernelTester()
596 .batch_size(batch_size)
597 .y_zero_point(y_zero_point)
598 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
599 }
600 }
601 }
602
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,a_scale)603 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, a_scale) {
604 TEST_REQUIRES_ARM_NEON_V8;
605 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
606 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
607 VMulMicrokernelTester()
608 .batch_size(batch_size)
609 .a_scale(a_scale)
610 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
611 }
612 }
613 }
614
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,b_scale)615 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, b_scale) {
616 TEST_REQUIRES_ARM_NEON_V8;
617 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
618 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
619 VMulMicrokernelTester()
620 .batch_size(batch_size)
621 .b_scale(b_scale)
622 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
623 }
624 }
625 }
626
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,y_scale)627 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, y_scale) {
628 TEST_REQUIRES_ARM_NEON_V8;
629 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
630 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
631 VMulMicrokernelTester()
632 .batch_size(batch_size)
633 .y_scale(y_scale)
634 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
635 }
636 }
637 }
638
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,qmin)639 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, qmin) {
640 TEST_REQUIRES_ARM_NEON_V8;
641 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
642 VMulMicrokernelTester()
643 .batch_size(batch_size)
644 .qmin(128)
645 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
646 }
647 }
648
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8,qmax)649 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X8, qmax) {
650 TEST_REQUIRES_ARM_NEON_V8;
651 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
652 VMulMicrokernelTester()
653 .batch_size(batch_size)
654 .qmax(128)
655 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
656 }
657 }
658 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
659
660
661 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,batch_eq_16)662 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, batch_eq_16) {
663 TEST_REQUIRES_ARM_NEON_V8;
664 VMulMicrokernelTester()
665 .batch_size(16)
666 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
667 }
668
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,batch_div_16)669 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, batch_div_16) {
670 TEST_REQUIRES_ARM_NEON_V8;
671 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
672 VMulMicrokernelTester()
673 .batch_size(batch_size)
674 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
675 }
676 }
677
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,batch_lt_16)678 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, batch_lt_16) {
679 TEST_REQUIRES_ARM_NEON_V8;
680 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
681 VMulMicrokernelTester()
682 .batch_size(batch_size)
683 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
684 }
685 }
686
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,batch_gt_16)687 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, batch_gt_16) {
688 TEST_REQUIRES_ARM_NEON_V8;
689 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
690 VMulMicrokernelTester()
691 .batch_size(batch_size)
692 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
693 }
694 }
695
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,inplace_a)696 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, inplace_a) {
697 TEST_REQUIRES_ARM_NEON_V8;
698 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
699 VMulMicrokernelTester()
700 .batch_size(batch_size)
701 .inplace_a(true)
702 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
703 }
704 }
705
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,inplace_b)706 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, inplace_b) {
707 TEST_REQUIRES_ARM_NEON_V8;
708 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
709 VMulMicrokernelTester()
710 .batch_size(batch_size)
711 .inplace_b(true)
712 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
713 }
714 }
715
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,inplace_a_and_b)716 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, inplace_a_and_b) {
717 TEST_REQUIRES_ARM_NEON_V8;
718 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
719 VMulMicrokernelTester()
720 .batch_size(batch_size)
721 .inplace_a(true)
722 .inplace_b(true)
723 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
724 }
725 }
726
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,a_zero_point)727 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, a_zero_point) {
728 TEST_REQUIRES_ARM_NEON_V8;
729 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
730 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
731 VMulMicrokernelTester()
732 .batch_size(batch_size)
733 .a_zero_point(a_zero_point)
734 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
735 }
736 }
737 }
738
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,b_zero_point)739 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, b_zero_point) {
740 TEST_REQUIRES_ARM_NEON_V8;
741 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
742 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
743 VMulMicrokernelTester()
744 .batch_size(batch_size)
745 .b_zero_point(b_zero_point)
746 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
747 }
748 }
749 }
750
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,y_zero_point)751 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, y_zero_point) {
752 TEST_REQUIRES_ARM_NEON_V8;
753 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
754 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
755 VMulMicrokernelTester()
756 .batch_size(batch_size)
757 .y_zero_point(y_zero_point)
758 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
759 }
760 }
761 }
762
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,a_scale)763 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, a_scale) {
764 TEST_REQUIRES_ARM_NEON_V8;
765 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
766 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
767 VMulMicrokernelTester()
768 .batch_size(batch_size)
769 .a_scale(a_scale)
770 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
771 }
772 }
773 }
774
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,b_scale)775 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, b_scale) {
776 TEST_REQUIRES_ARM_NEON_V8;
777 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
778 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
779 VMulMicrokernelTester()
780 .batch_size(batch_size)
781 .b_scale(b_scale)
782 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
783 }
784 }
785 }
786
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,y_scale)787 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, y_scale) {
788 TEST_REQUIRES_ARM_NEON_V8;
789 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
790 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
791 VMulMicrokernelTester()
792 .batch_size(batch_size)
793 .y_scale(y_scale)
794 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
795 }
796 }
797 }
798
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,qmin)799 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, qmin) {
800 TEST_REQUIRES_ARM_NEON_V8;
801 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
802 VMulMicrokernelTester()
803 .batch_size(batch_size)
804 .qmin(128)
805 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
806 }
807 }
808
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16,qmax)809 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD64_X16, qmax) {
810 TEST_REQUIRES_ARM_NEON_V8;
811 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
812 VMulMicrokernelTester()
813 .batch_size(batch_size)
814 .qmax(128)
815 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
816 }
817 }
818 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
819
820
821 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,batch_eq_16)822 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, batch_eq_16) {
823 TEST_REQUIRES_ARM_NEON_V8;
824 VMulMicrokernelTester()
825 .batch_size(16)
826 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
827 }
828
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,batch_div_16)829 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, batch_div_16) {
830 TEST_REQUIRES_ARM_NEON_V8;
831 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
832 VMulMicrokernelTester()
833 .batch_size(batch_size)
834 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
835 }
836 }
837
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,batch_lt_16)838 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, batch_lt_16) {
839 TEST_REQUIRES_ARM_NEON_V8;
840 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
841 VMulMicrokernelTester()
842 .batch_size(batch_size)
843 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
844 }
845 }
846
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,batch_gt_16)847 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, batch_gt_16) {
848 TEST_REQUIRES_ARM_NEON_V8;
849 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
850 VMulMicrokernelTester()
851 .batch_size(batch_size)
852 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
853 }
854 }
855
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,inplace_a)856 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, inplace_a) {
857 TEST_REQUIRES_ARM_NEON_V8;
858 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
859 VMulMicrokernelTester()
860 .batch_size(batch_size)
861 .inplace_a(true)
862 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
863 }
864 }
865
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,inplace_b)866 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, inplace_b) {
867 TEST_REQUIRES_ARM_NEON_V8;
868 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
869 VMulMicrokernelTester()
870 .batch_size(batch_size)
871 .inplace_b(true)
872 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
873 }
874 }
875
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,inplace_a_and_b)876 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, inplace_a_and_b) {
877 TEST_REQUIRES_ARM_NEON_V8;
878 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
879 VMulMicrokernelTester()
880 .batch_size(batch_size)
881 .inplace_a(true)
882 .inplace_b(true)
883 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
884 }
885 }
886
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,a_zero_point)887 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, a_zero_point) {
888 TEST_REQUIRES_ARM_NEON_V8;
889 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
890 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
891 VMulMicrokernelTester()
892 .batch_size(batch_size)
893 .a_zero_point(a_zero_point)
894 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
895 }
896 }
897 }
898
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,b_zero_point)899 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, b_zero_point) {
900 TEST_REQUIRES_ARM_NEON_V8;
901 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
902 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
903 VMulMicrokernelTester()
904 .batch_size(batch_size)
905 .b_zero_point(b_zero_point)
906 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
907 }
908 }
909 }
910
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,y_zero_point)911 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, y_zero_point) {
912 TEST_REQUIRES_ARM_NEON_V8;
913 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
914 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
915 VMulMicrokernelTester()
916 .batch_size(batch_size)
917 .y_zero_point(y_zero_point)
918 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
919 }
920 }
921 }
922
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,a_scale)923 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, a_scale) {
924 TEST_REQUIRES_ARM_NEON_V8;
925 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
926 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
927 VMulMicrokernelTester()
928 .batch_size(batch_size)
929 .a_scale(a_scale)
930 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
931 }
932 }
933 }
934
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,b_scale)935 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, b_scale) {
936 TEST_REQUIRES_ARM_NEON_V8;
937 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
938 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
939 VMulMicrokernelTester()
940 .batch_size(batch_size)
941 .b_scale(b_scale)
942 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
943 }
944 }
945 }
946
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,y_scale)947 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, y_scale) {
948 TEST_REQUIRES_ARM_NEON_V8;
949 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
950 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
951 VMulMicrokernelTester()
952 .batch_size(batch_size)
953 .y_scale(y_scale)
954 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
955 }
956 }
957 }
958
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,qmin)959 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, qmin) {
960 TEST_REQUIRES_ARM_NEON_V8;
961 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
962 VMulMicrokernelTester()
963 .batch_size(batch_size)
964 .qmin(128)
965 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
966 }
967 }
968
TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16,qmax)969 TEST(QS8_VMUL_MINMAX_FP32__NEONV8_LD128_X16, qmax) {
970 TEST_REQUIRES_ARM_NEON_V8;
971 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
972 VMulMicrokernelTester()
973 .batch_size(batch_size)
974 .qmax(128)
975 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16, xnn_init_qs8_mul_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
976 }
977 }
978 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
979
980
981 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,batch_eq_8)982 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, batch_eq_8) {
983 TEST_REQUIRES_X86_SSE2;
984 VMulMicrokernelTester()
985 .batch_size(8)
986 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
987 }
988
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,batch_div_8)989 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, batch_div_8) {
990 TEST_REQUIRES_X86_SSE2;
991 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
992 VMulMicrokernelTester()
993 .batch_size(batch_size)
994 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
995 }
996 }
997
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,batch_lt_8)998 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, batch_lt_8) {
999 TEST_REQUIRES_X86_SSE2;
1000 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1001 VMulMicrokernelTester()
1002 .batch_size(batch_size)
1003 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1004 }
1005 }
1006
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,batch_gt_8)1007 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, batch_gt_8) {
1008 TEST_REQUIRES_X86_SSE2;
1009 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1010 VMulMicrokernelTester()
1011 .batch_size(batch_size)
1012 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1013 }
1014 }
1015
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,inplace_a)1016 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, inplace_a) {
1017 TEST_REQUIRES_X86_SSE2;
1018 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1019 VMulMicrokernelTester()
1020 .batch_size(batch_size)
1021 .inplace_a(true)
1022 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1023 }
1024 }
1025
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,inplace_b)1026 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, inplace_b) {
1027 TEST_REQUIRES_X86_SSE2;
1028 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1029 VMulMicrokernelTester()
1030 .batch_size(batch_size)
1031 .inplace_b(true)
1032 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1033 }
1034 }
1035
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,inplace_a_and_b)1036 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, inplace_a_and_b) {
1037 TEST_REQUIRES_X86_SSE2;
1038 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1039 VMulMicrokernelTester()
1040 .batch_size(batch_size)
1041 .inplace_a(true)
1042 .inplace_b(true)
1043 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1044 }
1045 }
1046
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,a_zero_point)1047 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, a_zero_point) {
1048 TEST_REQUIRES_X86_SSE2;
1049 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1050 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1051 VMulMicrokernelTester()
1052 .batch_size(batch_size)
1053 .a_zero_point(a_zero_point)
1054 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1055 }
1056 }
1057 }
1058
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,b_zero_point)1059 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, b_zero_point) {
1060 TEST_REQUIRES_X86_SSE2;
1061 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1062 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1063 VMulMicrokernelTester()
1064 .batch_size(batch_size)
1065 .b_zero_point(b_zero_point)
1066 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1067 }
1068 }
1069 }
1070
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,y_zero_point)1071 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, y_zero_point) {
1072 TEST_REQUIRES_X86_SSE2;
1073 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1074 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1075 VMulMicrokernelTester()
1076 .batch_size(batch_size)
1077 .y_zero_point(y_zero_point)
1078 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1079 }
1080 }
1081 }
1082
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,a_scale)1083 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, a_scale) {
1084 TEST_REQUIRES_X86_SSE2;
1085 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1086 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1087 VMulMicrokernelTester()
1088 .batch_size(batch_size)
1089 .a_scale(a_scale)
1090 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1091 }
1092 }
1093 }
1094
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,b_scale)1095 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, b_scale) {
1096 TEST_REQUIRES_X86_SSE2;
1097 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1098 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1099 VMulMicrokernelTester()
1100 .batch_size(batch_size)
1101 .b_scale(b_scale)
1102 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1103 }
1104 }
1105 }
1106
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,y_scale)1107 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, y_scale) {
1108 TEST_REQUIRES_X86_SSE2;
1109 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1110 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1111 VMulMicrokernelTester()
1112 .batch_size(batch_size)
1113 .y_scale(y_scale)
1114 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1115 }
1116 }
1117 }
1118
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,qmin)1119 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, qmin) {
1120 TEST_REQUIRES_X86_SSE2;
1121 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1122 VMulMicrokernelTester()
1123 .batch_size(batch_size)
1124 .qmin(128)
1125 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1126 }
1127 }
1128
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8,qmax)1129 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X8, qmax) {
1130 TEST_REQUIRES_X86_SSE2;
1131 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1132 VMulMicrokernelTester()
1133 .batch_size(batch_size)
1134 .qmax(128)
1135 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1136 }
1137 }
1138 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1139
1140
1141 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,batch_eq_16)1142 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, batch_eq_16) {
1143 TEST_REQUIRES_X86_SSE2;
1144 VMulMicrokernelTester()
1145 .batch_size(16)
1146 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1147 }
1148
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,batch_div_16)1149 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, batch_div_16) {
1150 TEST_REQUIRES_X86_SSE2;
1151 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1152 VMulMicrokernelTester()
1153 .batch_size(batch_size)
1154 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1155 }
1156 }
1157
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,batch_lt_16)1158 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, batch_lt_16) {
1159 TEST_REQUIRES_X86_SSE2;
1160 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1161 VMulMicrokernelTester()
1162 .batch_size(batch_size)
1163 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1164 }
1165 }
1166
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,batch_gt_16)1167 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, batch_gt_16) {
1168 TEST_REQUIRES_X86_SSE2;
1169 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1170 VMulMicrokernelTester()
1171 .batch_size(batch_size)
1172 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1173 }
1174 }
1175
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,inplace_a)1176 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, inplace_a) {
1177 TEST_REQUIRES_X86_SSE2;
1178 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1179 VMulMicrokernelTester()
1180 .batch_size(batch_size)
1181 .inplace_a(true)
1182 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1183 }
1184 }
1185
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,inplace_b)1186 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, inplace_b) {
1187 TEST_REQUIRES_X86_SSE2;
1188 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1189 VMulMicrokernelTester()
1190 .batch_size(batch_size)
1191 .inplace_b(true)
1192 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1193 }
1194 }
1195
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,inplace_a_and_b)1196 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, inplace_a_and_b) {
1197 TEST_REQUIRES_X86_SSE2;
1198 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1199 VMulMicrokernelTester()
1200 .batch_size(batch_size)
1201 .inplace_a(true)
1202 .inplace_b(true)
1203 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1204 }
1205 }
1206
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,a_zero_point)1207 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, a_zero_point) {
1208 TEST_REQUIRES_X86_SSE2;
1209 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1210 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1211 VMulMicrokernelTester()
1212 .batch_size(batch_size)
1213 .a_zero_point(a_zero_point)
1214 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1215 }
1216 }
1217 }
1218
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,b_zero_point)1219 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, b_zero_point) {
1220 TEST_REQUIRES_X86_SSE2;
1221 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1222 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1223 VMulMicrokernelTester()
1224 .batch_size(batch_size)
1225 .b_zero_point(b_zero_point)
1226 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1227 }
1228 }
1229 }
1230
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,y_zero_point)1231 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, y_zero_point) {
1232 TEST_REQUIRES_X86_SSE2;
1233 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1234 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1235 VMulMicrokernelTester()
1236 .batch_size(batch_size)
1237 .y_zero_point(y_zero_point)
1238 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1239 }
1240 }
1241 }
1242
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,a_scale)1243 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, a_scale) {
1244 TEST_REQUIRES_X86_SSE2;
1245 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1246 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1247 VMulMicrokernelTester()
1248 .batch_size(batch_size)
1249 .a_scale(a_scale)
1250 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1251 }
1252 }
1253 }
1254
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,b_scale)1255 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, b_scale) {
1256 TEST_REQUIRES_X86_SSE2;
1257 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1258 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1259 VMulMicrokernelTester()
1260 .batch_size(batch_size)
1261 .b_scale(b_scale)
1262 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1263 }
1264 }
1265 }
1266
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,y_scale)1267 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, y_scale) {
1268 TEST_REQUIRES_X86_SSE2;
1269 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1270 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1271 VMulMicrokernelTester()
1272 .batch_size(batch_size)
1273 .y_scale(y_scale)
1274 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1275 }
1276 }
1277 }
1278
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,qmin)1279 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, qmin) {
1280 TEST_REQUIRES_X86_SSE2;
1281 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1282 VMulMicrokernelTester()
1283 .batch_size(batch_size)
1284 .qmin(128)
1285 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1286 }
1287 }
1288
TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16,qmax)1289 TEST(QS8_VMUL_MINMAX_FP32__SSE2_MUL16_LD64_X16, qmax) {
1290 TEST_REQUIRES_X86_SSE2;
1291 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1292 VMulMicrokernelTester()
1293 .batch_size(batch_size)
1294 .qmax(128)
1295 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
1296 }
1297 }
1298 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1299
1300
1301 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,batch_eq_8)1302 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, batch_eq_8) {
1303 TEST_REQUIRES_X86_SSE41;
1304 VMulMicrokernelTester()
1305 .batch_size(8)
1306 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1307 }
1308
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,batch_div_8)1309 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, batch_div_8) {
1310 TEST_REQUIRES_X86_SSE41;
1311 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1312 VMulMicrokernelTester()
1313 .batch_size(batch_size)
1314 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1315 }
1316 }
1317
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,batch_lt_8)1318 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, batch_lt_8) {
1319 TEST_REQUIRES_X86_SSE41;
1320 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1321 VMulMicrokernelTester()
1322 .batch_size(batch_size)
1323 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1324 }
1325 }
1326
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,batch_gt_8)1327 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, batch_gt_8) {
1328 TEST_REQUIRES_X86_SSE41;
1329 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1330 VMulMicrokernelTester()
1331 .batch_size(batch_size)
1332 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1333 }
1334 }
1335
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,inplace_a)1336 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, inplace_a) {
1337 TEST_REQUIRES_X86_SSE41;
1338 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1339 VMulMicrokernelTester()
1340 .batch_size(batch_size)
1341 .inplace_a(true)
1342 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1343 }
1344 }
1345
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,inplace_b)1346 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, inplace_b) {
1347 TEST_REQUIRES_X86_SSE41;
1348 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1349 VMulMicrokernelTester()
1350 .batch_size(batch_size)
1351 .inplace_b(true)
1352 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1353 }
1354 }
1355
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,inplace_a_and_b)1356 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, inplace_a_and_b) {
1357 TEST_REQUIRES_X86_SSE41;
1358 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1359 VMulMicrokernelTester()
1360 .batch_size(batch_size)
1361 .inplace_a(true)
1362 .inplace_b(true)
1363 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1364 }
1365 }
1366
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,a_zero_point)1367 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, a_zero_point) {
1368 TEST_REQUIRES_X86_SSE41;
1369 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1370 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1371 VMulMicrokernelTester()
1372 .batch_size(batch_size)
1373 .a_zero_point(a_zero_point)
1374 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1375 }
1376 }
1377 }
1378
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,b_zero_point)1379 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, b_zero_point) {
1380 TEST_REQUIRES_X86_SSE41;
1381 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1382 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1383 VMulMicrokernelTester()
1384 .batch_size(batch_size)
1385 .b_zero_point(b_zero_point)
1386 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1387 }
1388 }
1389 }
1390
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,y_zero_point)1391 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, y_zero_point) {
1392 TEST_REQUIRES_X86_SSE41;
1393 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1394 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1395 VMulMicrokernelTester()
1396 .batch_size(batch_size)
1397 .y_zero_point(y_zero_point)
1398 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1399 }
1400 }
1401 }
1402
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,a_scale)1403 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, a_scale) {
1404 TEST_REQUIRES_X86_SSE41;
1405 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1406 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1407 VMulMicrokernelTester()
1408 .batch_size(batch_size)
1409 .a_scale(a_scale)
1410 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1411 }
1412 }
1413 }
1414
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,b_scale)1415 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, b_scale) {
1416 TEST_REQUIRES_X86_SSE41;
1417 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1418 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1419 VMulMicrokernelTester()
1420 .batch_size(batch_size)
1421 .b_scale(b_scale)
1422 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1423 }
1424 }
1425 }
1426
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,y_scale)1427 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, y_scale) {
1428 TEST_REQUIRES_X86_SSE41;
1429 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1430 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1431 VMulMicrokernelTester()
1432 .batch_size(batch_size)
1433 .y_scale(y_scale)
1434 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1435 }
1436 }
1437 }
1438
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,qmin)1439 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, qmin) {
1440 TEST_REQUIRES_X86_SSE41;
1441 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1442 VMulMicrokernelTester()
1443 .batch_size(batch_size)
1444 .qmin(128)
1445 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1446 }
1447 }
1448
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8,qmax)1449 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X8, qmax) {
1450 TEST_REQUIRES_X86_SSE41;
1451 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1452 VMulMicrokernelTester()
1453 .batch_size(batch_size)
1454 .qmax(128)
1455 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1456 }
1457 }
1458 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1459
1460
1461 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,batch_eq_16)1462 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, batch_eq_16) {
1463 TEST_REQUIRES_X86_SSE41;
1464 VMulMicrokernelTester()
1465 .batch_size(16)
1466 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1467 }
1468
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,batch_div_16)1469 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, batch_div_16) {
1470 TEST_REQUIRES_X86_SSE41;
1471 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1472 VMulMicrokernelTester()
1473 .batch_size(batch_size)
1474 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1475 }
1476 }
1477
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,batch_lt_16)1478 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, batch_lt_16) {
1479 TEST_REQUIRES_X86_SSE41;
1480 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1481 VMulMicrokernelTester()
1482 .batch_size(batch_size)
1483 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1484 }
1485 }
1486
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,batch_gt_16)1487 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, batch_gt_16) {
1488 TEST_REQUIRES_X86_SSE41;
1489 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1490 VMulMicrokernelTester()
1491 .batch_size(batch_size)
1492 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1493 }
1494 }
1495
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,inplace_a)1496 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, inplace_a) {
1497 TEST_REQUIRES_X86_SSE41;
1498 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1499 VMulMicrokernelTester()
1500 .batch_size(batch_size)
1501 .inplace_a(true)
1502 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1503 }
1504 }
1505
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,inplace_b)1506 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, inplace_b) {
1507 TEST_REQUIRES_X86_SSE41;
1508 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1509 VMulMicrokernelTester()
1510 .batch_size(batch_size)
1511 .inplace_b(true)
1512 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1513 }
1514 }
1515
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,inplace_a_and_b)1516 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, inplace_a_and_b) {
1517 TEST_REQUIRES_X86_SSE41;
1518 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1519 VMulMicrokernelTester()
1520 .batch_size(batch_size)
1521 .inplace_a(true)
1522 .inplace_b(true)
1523 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1524 }
1525 }
1526
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,a_zero_point)1527 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, a_zero_point) {
1528 TEST_REQUIRES_X86_SSE41;
1529 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1530 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1531 VMulMicrokernelTester()
1532 .batch_size(batch_size)
1533 .a_zero_point(a_zero_point)
1534 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1535 }
1536 }
1537 }
1538
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,b_zero_point)1539 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, b_zero_point) {
1540 TEST_REQUIRES_X86_SSE41;
1541 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1542 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1543 VMulMicrokernelTester()
1544 .batch_size(batch_size)
1545 .b_zero_point(b_zero_point)
1546 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1547 }
1548 }
1549 }
1550
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,y_zero_point)1551 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, y_zero_point) {
1552 TEST_REQUIRES_X86_SSE41;
1553 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1554 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1555 VMulMicrokernelTester()
1556 .batch_size(batch_size)
1557 .y_zero_point(y_zero_point)
1558 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1559 }
1560 }
1561 }
1562
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,a_scale)1563 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, a_scale) {
1564 TEST_REQUIRES_X86_SSE41;
1565 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1566 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1567 VMulMicrokernelTester()
1568 .batch_size(batch_size)
1569 .a_scale(a_scale)
1570 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1571 }
1572 }
1573 }
1574
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,b_scale)1575 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, b_scale) {
1576 TEST_REQUIRES_X86_SSE41;
1577 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1578 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1579 VMulMicrokernelTester()
1580 .batch_size(batch_size)
1581 .b_scale(b_scale)
1582 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1583 }
1584 }
1585 }
1586
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,y_scale)1587 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, y_scale) {
1588 TEST_REQUIRES_X86_SSE41;
1589 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1590 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1591 VMulMicrokernelTester()
1592 .batch_size(batch_size)
1593 .y_scale(y_scale)
1594 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1595 }
1596 }
1597 }
1598
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,qmin)1599 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, qmin) {
1600 TEST_REQUIRES_X86_SSE41;
1601 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1602 VMulMicrokernelTester()
1603 .batch_size(batch_size)
1604 .qmin(128)
1605 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1606 }
1607 }
1608
TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16,qmax)1609 TEST(QS8_VMUL_MINMAX_FP32__SSE41_MUL16_LD64_X16, qmax) {
1610 TEST_REQUIRES_X86_SSE41;
1611 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1612 VMulMicrokernelTester()
1613 .batch_size(batch_size)
1614 .qmax(128)
1615 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1616 }
1617 }
1618 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1619
1620
1621 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,batch_eq_8)1622 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, batch_eq_8) {
1623 TEST_REQUIRES_X86_AVX;
1624 VMulMicrokernelTester()
1625 .batch_size(8)
1626 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1627 }
1628
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,batch_div_8)1629 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, batch_div_8) {
1630 TEST_REQUIRES_X86_AVX;
1631 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1632 VMulMicrokernelTester()
1633 .batch_size(batch_size)
1634 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1635 }
1636 }
1637
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,batch_lt_8)1638 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, batch_lt_8) {
1639 TEST_REQUIRES_X86_AVX;
1640 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1641 VMulMicrokernelTester()
1642 .batch_size(batch_size)
1643 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1644 }
1645 }
1646
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,batch_gt_8)1647 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, batch_gt_8) {
1648 TEST_REQUIRES_X86_AVX;
1649 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1650 VMulMicrokernelTester()
1651 .batch_size(batch_size)
1652 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1653 }
1654 }
1655
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,inplace_a)1656 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, inplace_a) {
1657 TEST_REQUIRES_X86_AVX;
1658 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1659 VMulMicrokernelTester()
1660 .batch_size(batch_size)
1661 .inplace_a(true)
1662 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1663 }
1664 }
1665
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,inplace_b)1666 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, inplace_b) {
1667 TEST_REQUIRES_X86_AVX;
1668 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1669 VMulMicrokernelTester()
1670 .batch_size(batch_size)
1671 .inplace_b(true)
1672 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1673 }
1674 }
1675
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,inplace_a_and_b)1676 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, inplace_a_and_b) {
1677 TEST_REQUIRES_X86_AVX;
1678 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1679 VMulMicrokernelTester()
1680 .batch_size(batch_size)
1681 .inplace_a(true)
1682 .inplace_b(true)
1683 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1684 }
1685 }
1686
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,a_zero_point)1687 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, a_zero_point) {
1688 TEST_REQUIRES_X86_AVX;
1689 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1690 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1691 VMulMicrokernelTester()
1692 .batch_size(batch_size)
1693 .a_zero_point(a_zero_point)
1694 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1695 }
1696 }
1697 }
1698
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,b_zero_point)1699 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, b_zero_point) {
1700 TEST_REQUIRES_X86_AVX;
1701 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1702 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1703 VMulMicrokernelTester()
1704 .batch_size(batch_size)
1705 .b_zero_point(b_zero_point)
1706 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1707 }
1708 }
1709 }
1710
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,y_zero_point)1711 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, y_zero_point) {
1712 TEST_REQUIRES_X86_AVX;
1713 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1714 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1715 VMulMicrokernelTester()
1716 .batch_size(batch_size)
1717 .y_zero_point(y_zero_point)
1718 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1719 }
1720 }
1721 }
1722
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,a_scale)1723 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, a_scale) {
1724 TEST_REQUIRES_X86_AVX;
1725 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1726 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1727 VMulMicrokernelTester()
1728 .batch_size(batch_size)
1729 .a_scale(a_scale)
1730 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1731 }
1732 }
1733 }
1734
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,b_scale)1735 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, b_scale) {
1736 TEST_REQUIRES_X86_AVX;
1737 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1738 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1739 VMulMicrokernelTester()
1740 .batch_size(batch_size)
1741 .b_scale(b_scale)
1742 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1743 }
1744 }
1745 }
1746
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,y_scale)1747 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, y_scale) {
1748 TEST_REQUIRES_X86_AVX;
1749 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1750 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1751 VMulMicrokernelTester()
1752 .batch_size(batch_size)
1753 .y_scale(y_scale)
1754 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1755 }
1756 }
1757 }
1758
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,qmin)1759 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, qmin) {
1760 TEST_REQUIRES_X86_AVX;
1761 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1762 VMulMicrokernelTester()
1763 .batch_size(batch_size)
1764 .qmin(128)
1765 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1766 }
1767 }
1768
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8,qmax)1769 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X8, qmax) {
1770 TEST_REQUIRES_X86_AVX;
1771 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1772 VMulMicrokernelTester()
1773 .batch_size(batch_size)
1774 .qmax(128)
1775 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1776 }
1777 }
1778 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1779
1780
1781 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,batch_eq_16)1782 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, batch_eq_16) {
1783 TEST_REQUIRES_X86_AVX;
1784 VMulMicrokernelTester()
1785 .batch_size(16)
1786 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1787 }
1788
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,batch_div_16)1789 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, batch_div_16) {
1790 TEST_REQUIRES_X86_AVX;
1791 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1792 VMulMicrokernelTester()
1793 .batch_size(batch_size)
1794 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1795 }
1796 }
1797
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,batch_lt_16)1798 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, batch_lt_16) {
1799 TEST_REQUIRES_X86_AVX;
1800 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1801 VMulMicrokernelTester()
1802 .batch_size(batch_size)
1803 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1804 }
1805 }
1806
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,batch_gt_16)1807 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, batch_gt_16) {
1808 TEST_REQUIRES_X86_AVX;
1809 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1810 VMulMicrokernelTester()
1811 .batch_size(batch_size)
1812 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1813 }
1814 }
1815
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,inplace_a)1816 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, inplace_a) {
1817 TEST_REQUIRES_X86_AVX;
1818 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1819 VMulMicrokernelTester()
1820 .batch_size(batch_size)
1821 .inplace_a(true)
1822 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1823 }
1824 }
1825
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,inplace_b)1826 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, inplace_b) {
1827 TEST_REQUIRES_X86_AVX;
1828 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1829 VMulMicrokernelTester()
1830 .batch_size(batch_size)
1831 .inplace_b(true)
1832 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1833 }
1834 }
1835
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,inplace_a_and_b)1836 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, inplace_a_and_b) {
1837 TEST_REQUIRES_X86_AVX;
1838 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1839 VMulMicrokernelTester()
1840 .batch_size(batch_size)
1841 .inplace_a(true)
1842 .inplace_b(true)
1843 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1844 }
1845 }
1846
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,a_zero_point)1847 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, a_zero_point) {
1848 TEST_REQUIRES_X86_AVX;
1849 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1850 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1851 VMulMicrokernelTester()
1852 .batch_size(batch_size)
1853 .a_zero_point(a_zero_point)
1854 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1855 }
1856 }
1857 }
1858
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,b_zero_point)1859 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, b_zero_point) {
1860 TEST_REQUIRES_X86_AVX;
1861 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1862 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1863 VMulMicrokernelTester()
1864 .batch_size(batch_size)
1865 .b_zero_point(b_zero_point)
1866 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1867 }
1868 }
1869 }
1870
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,y_zero_point)1871 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, y_zero_point) {
1872 TEST_REQUIRES_X86_AVX;
1873 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1874 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1875 VMulMicrokernelTester()
1876 .batch_size(batch_size)
1877 .y_zero_point(y_zero_point)
1878 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1879 }
1880 }
1881 }
1882
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,a_scale)1883 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, a_scale) {
1884 TEST_REQUIRES_X86_AVX;
1885 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1886 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1887 VMulMicrokernelTester()
1888 .batch_size(batch_size)
1889 .a_scale(a_scale)
1890 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1891 }
1892 }
1893 }
1894
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,b_scale)1895 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, b_scale) {
1896 TEST_REQUIRES_X86_AVX;
1897 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1898 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1899 VMulMicrokernelTester()
1900 .batch_size(batch_size)
1901 .b_scale(b_scale)
1902 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1903 }
1904 }
1905 }
1906
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,y_scale)1907 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, y_scale) {
1908 TEST_REQUIRES_X86_AVX;
1909 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1910 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1911 VMulMicrokernelTester()
1912 .batch_size(batch_size)
1913 .y_scale(y_scale)
1914 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1915 }
1916 }
1917 }
1918
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,qmin)1919 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, qmin) {
1920 TEST_REQUIRES_X86_AVX;
1921 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1922 VMulMicrokernelTester()
1923 .batch_size(batch_size)
1924 .qmin(128)
1925 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1926 }
1927 }
1928
TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16,qmax)1929 TEST(QS8_VMUL_MINMAX_FP32__AVX_MUL16_LD64_X16, qmax) {
1930 TEST_REQUIRES_X86_AVX;
1931 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1932 VMulMicrokernelTester()
1933 .batch_size(batch_size)
1934 .qmax(128)
1935 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_mul_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
1936 }
1937 }
1938 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1939
1940
1941 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,batch_eq_8)1942 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, batch_eq_8) {
1943 VMulMicrokernelTester()
1944 .batch_size(8)
1945 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1946 }
1947
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,batch_div_8)1948 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, batch_div_8) {
1949 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1950 VMulMicrokernelTester()
1951 .batch_size(batch_size)
1952 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1953 }
1954 }
1955
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,batch_lt_8)1956 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, batch_lt_8) {
1957 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1958 VMulMicrokernelTester()
1959 .batch_size(batch_size)
1960 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1961 }
1962 }
1963
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,batch_gt_8)1964 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, batch_gt_8) {
1965 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1966 VMulMicrokernelTester()
1967 .batch_size(batch_size)
1968 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1969 }
1970 }
1971
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,inplace_a)1972 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, inplace_a) {
1973 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1974 VMulMicrokernelTester()
1975 .batch_size(batch_size)
1976 .inplace_a(true)
1977 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1978 }
1979 }
1980
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,inplace_b)1981 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, inplace_b) {
1982 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1983 VMulMicrokernelTester()
1984 .batch_size(batch_size)
1985 .inplace_b(true)
1986 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1987 }
1988 }
1989
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,inplace_a_and_b)1990 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, inplace_a_and_b) {
1991 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1992 VMulMicrokernelTester()
1993 .batch_size(batch_size)
1994 .inplace_a(true)
1995 .inplace_b(true)
1996 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
1997 }
1998 }
1999
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,a_zero_point)2000 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, a_zero_point) {
2001 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2002 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2003 VMulMicrokernelTester()
2004 .batch_size(batch_size)
2005 .a_zero_point(a_zero_point)
2006 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2007 }
2008 }
2009 }
2010
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,b_zero_point)2011 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, b_zero_point) {
2012 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2013 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2014 VMulMicrokernelTester()
2015 .batch_size(batch_size)
2016 .b_zero_point(b_zero_point)
2017 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2018 }
2019 }
2020 }
2021
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,y_zero_point)2022 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, y_zero_point) {
2023 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2024 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2025 VMulMicrokernelTester()
2026 .batch_size(batch_size)
2027 .y_zero_point(y_zero_point)
2028 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2029 }
2030 }
2031 }
2032
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,a_scale)2033 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, a_scale) {
2034 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2035 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2036 VMulMicrokernelTester()
2037 .batch_size(batch_size)
2038 .a_scale(a_scale)
2039 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2040 }
2041 }
2042 }
2043
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,b_scale)2044 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, b_scale) {
2045 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2046 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2047 VMulMicrokernelTester()
2048 .batch_size(batch_size)
2049 .b_scale(b_scale)
2050 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2051 }
2052 }
2053 }
2054
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,y_scale)2055 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, y_scale) {
2056 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2057 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2058 VMulMicrokernelTester()
2059 .batch_size(batch_size)
2060 .y_scale(y_scale)
2061 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2062 }
2063 }
2064 }
2065
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,qmin)2066 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, qmin) {
2067 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2068 VMulMicrokernelTester()
2069 .batch_size(batch_size)
2070 .qmin(128)
2071 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2072 }
2073 }
2074
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8,qmax)2075 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X8, qmax) {
2076 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2077 VMulMicrokernelTester()
2078 .batch_size(batch_size)
2079 .qmax(128)
2080 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2081 }
2082 }
2083 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2084
2085
2086 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,batch_eq_16)2087 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, batch_eq_16) {
2088 VMulMicrokernelTester()
2089 .batch_size(16)
2090 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2091 }
2092
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,batch_div_16)2093 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, batch_div_16) {
2094 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2095 VMulMicrokernelTester()
2096 .batch_size(batch_size)
2097 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2098 }
2099 }
2100
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,batch_lt_16)2101 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, batch_lt_16) {
2102 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2103 VMulMicrokernelTester()
2104 .batch_size(batch_size)
2105 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2106 }
2107 }
2108
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,batch_gt_16)2109 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, batch_gt_16) {
2110 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2111 VMulMicrokernelTester()
2112 .batch_size(batch_size)
2113 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2114 }
2115 }
2116
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,inplace_a)2117 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, inplace_a) {
2118 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2119 VMulMicrokernelTester()
2120 .batch_size(batch_size)
2121 .inplace_a(true)
2122 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2123 }
2124 }
2125
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,inplace_b)2126 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, inplace_b) {
2127 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2128 VMulMicrokernelTester()
2129 .batch_size(batch_size)
2130 .inplace_b(true)
2131 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2132 }
2133 }
2134
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,inplace_a_and_b)2135 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, inplace_a_and_b) {
2136 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2137 VMulMicrokernelTester()
2138 .batch_size(batch_size)
2139 .inplace_a(true)
2140 .inplace_b(true)
2141 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2142 }
2143 }
2144
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,a_zero_point)2145 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, a_zero_point) {
2146 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2147 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2148 VMulMicrokernelTester()
2149 .batch_size(batch_size)
2150 .a_zero_point(a_zero_point)
2151 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2152 }
2153 }
2154 }
2155
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,b_zero_point)2156 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, b_zero_point) {
2157 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2158 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2159 VMulMicrokernelTester()
2160 .batch_size(batch_size)
2161 .b_zero_point(b_zero_point)
2162 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2163 }
2164 }
2165 }
2166
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,y_zero_point)2167 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, y_zero_point) {
2168 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2169 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2170 VMulMicrokernelTester()
2171 .batch_size(batch_size)
2172 .y_zero_point(y_zero_point)
2173 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2174 }
2175 }
2176 }
2177
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,a_scale)2178 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, a_scale) {
2179 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2180 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2181 VMulMicrokernelTester()
2182 .batch_size(batch_size)
2183 .a_scale(a_scale)
2184 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2185 }
2186 }
2187 }
2188
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,b_scale)2189 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, b_scale) {
2190 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2191 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2192 VMulMicrokernelTester()
2193 .batch_size(batch_size)
2194 .b_scale(b_scale)
2195 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2196 }
2197 }
2198 }
2199
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,y_scale)2200 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, y_scale) {
2201 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2202 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2203 VMulMicrokernelTester()
2204 .batch_size(batch_size)
2205 .y_scale(y_scale)
2206 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2207 }
2208 }
2209 }
2210
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,qmin)2211 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, qmin) {
2212 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2213 VMulMicrokernelTester()
2214 .batch_size(batch_size)
2215 .qmin(128)
2216 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2217 }
2218 }
2219
TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16,qmax)2220 TEST(QS8_VMUL_MINMAX_FP32__WASMSIMD_MUL32_LD64_X16, qmax) {
2221 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2222 VMulMicrokernelTester()
2223 .batch_size(batch_size)
2224 .qmax(128)
2225 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16, xnn_init_qs8_mul_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
2226 }
2227 }
2228 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2229
2230
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,batch_eq_1)2231 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, batch_eq_1) {
2232 VMulMicrokernelTester()
2233 .batch_size(1)
2234 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2235 }
2236
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,batch_gt_1)2237 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, batch_gt_1) {
2238 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
2239 VMulMicrokernelTester()
2240 .batch_size(batch_size)
2241 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2242 }
2243 }
2244
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,inplace_a)2245 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, inplace_a) {
2246 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2247 VMulMicrokernelTester()
2248 .batch_size(batch_size)
2249 .inplace_a(true)
2250 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2251 }
2252 }
2253
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,inplace_b)2254 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, inplace_b) {
2255 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2256 VMulMicrokernelTester()
2257 .batch_size(batch_size)
2258 .inplace_b(true)
2259 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2260 }
2261 }
2262
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,inplace_a_and_b)2263 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, inplace_a_and_b) {
2264 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2265 VMulMicrokernelTester()
2266 .batch_size(batch_size)
2267 .inplace_a(true)
2268 .inplace_b(true)
2269 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2270 }
2271 }
2272
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,a_zero_point)2273 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, a_zero_point) {
2274 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2275 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2276 VMulMicrokernelTester()
2277 .batch_size(batch_size)
2278 .a_zero_point(a_zero_point)
2279 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2280 }
2281 }
2282 }
2283
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,b_zero_point)2284 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, b_zero_point) {
2285 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2286 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2287 VMulMicrokernelTester()
2288 .batch_size(batch_size)
2289 .b_zero_point(b_zero_point)
2290 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2291 }
2292 }
2293 }
2294
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,y_zero_point)2295 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, y_zero_point) {
2296 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2297 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2298 VMulMicrokernelTester()
2299 .batch_size(batch_size)
2300 .y_zero_point(y_zero_point)
2301 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2302 }
2303 }
2304 }
2305
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,a_scale)2306 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, a_scale) {
2307 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2308 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2309 VMulMicrokernelTester()
2310 .batch_size(batch_size)
2311 .a_scale(a_scale)
2312 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2313 }
2314 }
2315 }
2316
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,b_scale)2317 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, b_scale) {
2318 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2319 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2320 VMulMicrokernelTester()
2321 .batch_size(batch_size)
2322 .b_scale(b_scale)
2323 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2324 }
2325 }
2326 }
2327
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,y_scale)2328 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, y_scale) {
2329 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2330 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2331 VMulMicrokernelTester()
2332 .batch_size(batch_size)
2333 .y_scale(y_scale)
2334 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2335 }
2336 }
2337 }
2338
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,qmin)2339 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, qmin) {
2340 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2341 VMulMicrokernelTester()
2342 .batch_size(batch_size)
2343 .qmin(128)
2344 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2345 }
2346 }
2347
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1,qmax)2348 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X1, qmax) {
2349 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
2350 VMulMicrokernelTester()
2351 .batch_size(batch_size)
2352 .qmax(128)
2353 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2354 }
2355 }
2356
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,batch_eq_2)2357 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, batch_eq_2) {
2358 VMulMicrokernelTester()
2359 .batch_size(2)
2360 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2361 }
2362
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,batch_div_2)2363 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, batch_div_2) {
2364 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
2365 VMulMicrokernelTester()
2366 .batch_size(batch_size)
2367 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2368 }
2369 }
2370
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,batch_lt_2)2371 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, batch_lt_2) {
2372 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
2373 VMulMicrokernelTester()
2374 .batch_size(batch_size)
2375 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2376 }
2377 }
2378
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,batch_gt_2)2379 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, batch_gt_2) {
2380 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
2381 VMulMicrokernelTester()
2382 .batch_size(batch_size)
2383 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2384 }
2385 }
2386
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,inplace_a)2387 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, inplace_a) {
2388 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2389 VMulMicrokernelTester()
2390 .batch_size(batch_size)
2391 .inplace_a(true)
2392 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2393 }
2394 }
2395
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,inplace_b)2396 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, inplace_b) {
2397 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2398 VMulMicrokernelTester()
2399 .batch_size(batch_size)
2400 .inplace_b(true)
2401 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2402 }
2403 }
2404
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,inplace_a_and_b)2405 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, inplace_a_and_b) {
2406 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2407 VMulMicrokernelTester()
2408 .batch_size(batch_size)
2409 .inplace_a(true)
2410 .inplace_b(true)
2411 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2412 }
2413 }
2414
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,a_zero_point)2415 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, a_zero_point) {
2416 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2417 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2418 VMulMicrokernelTester()
2419 .batch_size(batch_size)
2420 .a_zero_point(a_zero_point)
2421 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2422 }
2423 }
2424 }
2425
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,b_zero_point)2426 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, b_zero_point) {
2427 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2428 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2429 VMulMicrokernelTester()
2430 .batch_size(batch_size)
2431 .b_zero_point(b_zero_point)
2432 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2433 }
2434 }
2435 }
2436
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,y_zero_point)2437 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, y_zero_point) {
2438 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2439 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2440 VMulMicrokernelTester()
2441 .batch_size(batch_size)
2442 .y_zero_point(y_zero_point)
2443 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2444 }
2445 }
2446 }
2447
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,a_scale)2448 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, a_scale) {
2449 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2450 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2451 VMulMicrokernelTester()
2452 .batch_size(batch_size)
2453 .a_scale(a_scale)
2454 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2455 }
2456 }
2457 }
2458
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,b_scale)2459 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, b_scale) {
2460 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2461 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2462 VMulMicrokernelTester()
2463 .batch_size(batch_size)
2464 .b_scale(b_scale)
2465 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2466 }
2467 }
2468 }
2469
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,y_scale)2470 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, y_scale) {
2471 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2472 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2473 VMulMicrokernelTester()
2474 .batch_size(batch_size)
2475 .y_scale(y_scale)
2476 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2477 }
2478 }
2479 }
2480
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,qmin)2481 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, qmin) {
2482 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2483 VMulMicrokernelTester()
2484 .batch_size(batch_size)
2485 .qmin(128)
2486 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2487 }
2488 }
2489
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2,qmax)2490 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X2, qmax) {
2491 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
2492 VMulMicrokernelTester()
2493 .batch_size(batch_size)
2494 .qmax(128)
2495 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2496 }
2497 }
2498
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,batch_eq_4)2499 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, batch_eq_4) {
2500 VMulMicrokernelTester()
2501 .batch_size(4)
2502 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2503 }
2504
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,batch_div_4)2505 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, batch_div_4) {
2506 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
2507 VMulMicrokernelTester()
2508 .batch_size(batch_size)
2509 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2510 }
2511 }
2512
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,batch_lt_4)2513 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, batch_lt_4) {
2514 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
2515 VMulMicrokernelTester()
2516 .batch_size(batch_size)
2517 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2518 }
2519 }
2520
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,batch_gt_4)2521 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, batch_gt_4) {
2522 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
2523 VMulMicrokernelTester()
2524 .batch_size(batch_size)
2525 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2526 }
2527 }
2528
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,inplace_a)2529 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, inplace_a) {
2530 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2531 VMulMicrokernelTester()
2532 .batch_size(batch_size)
2533 .inplace_a(true)
2534 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2535 }
2536 }
2537
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,inplace_b)2538 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, inplace_b) {
2539 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2540 VMulMicrokernelTester()
2541 .batch_size(batch_size)
2542 .inplace_b(true)
2543 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2544 }
2545 }
2546
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,inplace_a_and_b)2547 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, inplace_a_and_b) {
2548 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2549 VMulMicrokernelTester()
2550 .batch_size(batch_size)
2551 .inplace_a(true)
2552 .inplace_b(true)
2553 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2554 }
2555 }
2556
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,a_zero_point)2557 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, a_zero_point) {
2558 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2559 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2560 VMulMicrokernelTester()
2561 .batch_size(batch_size)
2562 .a_zero_point(a_zero_point)
2563 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2564 }
2565 }
2566 }
2567
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,b_zero_point)2568 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, b_zero_point) {
2569 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2570 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2571 VMulMicrokernelTester()
2572 .batch_size(batch_size)
2573 .b_zero_point(b_zero_point)
2574 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2575 }
2576 }
2577 }
2578
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,y_zero_point)2579 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, y_zero_point) {
2580 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2581 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2582 VMulMicrokernelTester()
2583 .batch_size(batch_size)
2584 .y_zero_point(y_zero_point)
2585 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2586 }
2587 }
2588 }
2589
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,a_scale)2590 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, a_scale) {
2591 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2592 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2593 VMulMicrokernelTester()
2594 .batch_size(batch_size)
2595 .a_scale(a_scale)
2596 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2597 }
2598 }
2599 }
2600
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,b_scale)2601 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, b_scale) {
2602 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2603 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2604 VMulMicrokernelTester()
2605 .batch_size(batch_size)
2606 .b_scale(b_scale)
2607 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2608 }
2609 }
2610 }
2611
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,y_scale)2612 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, y_scale) {
2613 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2614 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2615 VMulMicrokernelTester()
2616 .batch_size(batch_size)
2617 .y_scale(y_scale)
2618 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2619 }
2620 }
2621 }
2622
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,qmin)2623 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, qmin) {
2624 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2625 VMulMicrokernelTester()
2626 .batch_size(batch_size)
2627 .qmin(128)
2628 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2629 }
2630 }
2631
TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4,qmax)2632 TEST(QS8_VMUL_MINMAX_FP32__SCALAR_X4, qmax) {
2633 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2634 VMulMicrokernelTester()
2635 .batch_size(batch_size)
2636 .qmax(128)
2637 .Test(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4, xnn_init_qs8_mul_minmax_fp32_scalar_params, xnn_qs8_requantize_fp32);
2638 }
2639 }