1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 // Specification: test/qs8-vaddc-minmax.yaml
8 // Generator: tools/generate-vbinary-test.py
9
10
11 #include <gtest/gtest.h>
12
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15
16 #include <xnnpack/microparams-init.h>
17 #include <xnnpack/vadd.h>
18 #include "vaddc-microkernel-tester.h"
19
20
21 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,batch_eq_8)22 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, batch_eq_8) {
23 TEST_REQUIRES_ARM_NEON;
24 VAddCMicrokernelTester()
25 .batch_size(8)
26 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
27 }
28
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,batch_div_8)29 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, batch_div_8) {
30 TEST_REQUIRES_ARM_NEON;
31 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
32 VAddCMicrokernelTester()
33 .batch_size(batch_size)
34 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
35 }
36 }
37
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,batch_lt_8)38 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, batch_lt_8) {
39 TEST_REQUIRES_ARM_NEON;
40 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
41 VAddCMicrokernelTester()
42 .batch_size(batch_size)
43 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
44 }
45 }
46
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,batch_gt_8)47 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, batch_gt_8) {
48 TEST_REQUIRES_ARM_NEON;
49 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
50 VAddCMicrokernelTester()
51 .batch_size(batch_size)
52 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
53 }
54 }
55
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,inplace)56 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, inplace) {
57 TEST_REQUIRES_ARM_NEON;
58 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
59 VAddCMicrokernelTester()
60 .batch_size(batch_size)
61 .inplace(true)
62 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
63 }
64 }
65
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,a_zero_point)66 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, a_zero_point) {
67 TEST_REQUIRES_ARM_NEON;
68 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
69 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
70 VAddCMicrokernelTester()
71 .batch_size(batch_size)
72 .a_zero_point(a_zero_point)
73 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
74 }
75 }
76 }
77
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,b_zero_point)78 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, b_zero_point) {
79 TEST_REQUIRES_ARM_NEON;
80 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
81 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
82 VAddCMicrokernelTester()
83 .batch_size(batch_size)
84 .b_zero_point(b_zero_point)
85 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
86 }
87 }
88 }
89
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,y_zero_point)90 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, y_zero_point) {
91 TEST_REQUIRES_ARM_NEON;
92 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
93 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
94 VAddCMicrokernelTester()
95 .batch_size(batch_size)
96 .y_zero_point(y_zero_point)
97 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
98 }
99 }
100 }
101
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,a_scale)102 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, a_scale) {
103 TEST_REQUIRES_ARM_NEON;
104 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
105 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
106 VAddCMicrokernelTester()
107 .batch_size(batch_size)
108 .a_scale(a_scale)
109 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
110 }
111 }
112 }
113
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,b_scale)114 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, b_scale) {
115 TEST_REQUIRES_ARM_NEON;
116 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
117 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
118 VAddCMicrokernelTester()
119 .batch_size(batch_size)
120 .b_scale(b_scale)
121 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
122 }
123 }
124 }
125
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,y_scale)126 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, y_scale) {
127 TEST_REQUIRES_ARM_NEON;
128 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
129 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
130 VAddCMicrokernelTester()
131 .batch_size(batch_size)
132 .y_scale(y_scale)
133 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
134 }
135 }
136 }
137
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,qmin)138 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, qmin) {
139 TEST_REQUIRES_ARM_NEON;
140 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
141 VAddCMicrokernelTester()
142 .batch_size(batch_size)
143 .qmin(128)
144 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
145 }
146 }
147
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,qmax)148 TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, qmax) {
149 TEST_REQUIRES_ARM_NEON;
150 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
151 VAddCMicrokernelTester()
152 .batch_size(batch_size)
153 .qmax(128)
154 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
155 }
156 }
157 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
158
159
160 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,batch_eq_16)161 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, batch_eq_16) {
162 TEST_REQUIRES_ARM_NEON;
163 VAddCMicrokernelTester()
164 .batch_size(16)
165 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
166 }
167
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,batch_div_16)168 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, batch_div_16) {
169 TEST_REQUIRES_ARM_NEON;
170 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
171 VAddCMicrokernelTester()
172 .batch_size(batch_size)
173 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
174 }
175 }
176
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,batch_lt_16)177 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, batch_lt_16) {
178 TEST_REQUIRES_ARM_NEON;
179 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
180 VAddCMicrokernelTester()
181 .batch_size(batch_size)
182 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
183 }
184 }
185
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,batch_gt_16)186 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, batch_gt_16) {
187 TEST_REQUIRES_ARM_NEON;
188 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
189 VAddCMicrokernelTester()
190 .batch_size(batch_size)
191 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
192 }
193 }
194
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,inplace)195 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, inplace) {
196 TEST_REQUIRES_ARM_NEON;
197 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
198 VAddCMicrokernelTester()
199 .batch_size(batch_size)
200 .inplace(true)
201 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
202 }
203 }
204
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,a_zero_point)205 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, a_zero_point) {
206 TEST_REQUIRES_ARM_NEON;
207 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
208 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
209 VAddCMicrokernelTester()
210 .batch_size(batch_size)
211 .a_zero_point(a_zero_point)
212 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
213 }
214 }
215 }
216
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,b_zero_point)217 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, b_zero_point) {
218 TEST_REQUIRES_ARM_NEON;
219 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
220 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
221 VAddCMicrokernelTester()
222 .batch_size(batch_size)
223 .b_zero_point(b_zero_point)
224 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
225 }
226 }
227 }
228
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,y_zero_point)229 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, y_zero_point) {
230 TEST_REQUIRES_ARM_NEON;
231 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
232 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
233 VAddCMicrokernelTester()
234 .batch_size(batch_size)
235 .y_zero_point(y_zero_point)
236 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
237 }
238 }
239 }
240
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,a_scale)241 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, a_scale) {
242 TEST_REQUIRES_ARM_NEON;
243 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
244 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
245 VAddCMicrokernelTester()
246 .batch_size(batch_size)
247 .a_scale(a_scale)
248 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
249 }
250 }
251 }
252
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,b_scale)253 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, b_scale) {
254 TEST_REQUIRES_ARM_NEON;
255 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
256 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
257 VAddCMicrokernelTester()
258 .batch_size(batch_size)
259 .b_scale(b_scale)
260 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
261 }
262 }
263 }
264
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,y_scale)265 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, y_scale) {
266 TEST_REQUIRES_ARM_NEON;
267 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
268 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
269 VAddCMicrokernelTester()
270 .batch_size(batch_size)
271 .y_scale(y_scale)
272 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
273 }
274 }
275 }
276
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,qmin)277 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, qmin) {
278 TEST_REQUIRES_ARM_NEON;
279 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
280 VAddCMicrokernelTester()
281 .batch_size(batch_size)
282 .qmin(128)
283 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
284 }
285 }
286
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,qmax)287 TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, qmax) {
288 TEST_REQUIRES_ARM_NEON;
289 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
290 VAddCMicrokernelTester()
291 .batch_size(batch_size)
292 .qmax(128)
293 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
294 }
295 }
296 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
297
298
299 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,batch_eq_24)300 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, batch_eq_24) {
301 TEST_REQUIRES_ARM_NEON;
302 VAddCMicrokernelTester()
303 .batch_size(24)
304 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
305 }
306
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,batch_div_24)307 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, batch_div_24) {
308 TEST_REQUIRES_ARM_NEON;
309 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
310 VAddCMicrokernelTester()
311 .batch_size(batch_size)
312 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
313 }
314 }
315
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,batch_lt_24)316 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, batch_lt_24) {
317 TEST_REQUIRES_ARM_NEON;
318 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
319 VAddCMicrokernelTester()
320 .batch_size(batch_size)
321 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
322 }
323 }
324
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,batch_gt_24)325 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, batch_gt_24) {
326 TEST_REQUIRES_ARM_NEON;
327 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
328 VAddCMicrokernelTester()
329 .batch_size(batch_size)
330 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
331 }
332 }
333
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,inplace)334 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, inplace) {
335 TEST_REQUIRES_ARM_NEON;
336 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
337 VAddCMicrokernelTester()
338 .batch_size(batch_size)
339 .inplace(true)
340 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
341 }
342 }
343
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,a_zero_point)344 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, a_zero_point) {
345 TEST_REQUIRES_ARM_NEON;
346 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
347 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
348 VAddCMicrokernelTester()
349 .batch_size(batch_size)
350 .a_zero_point(a_zero_point)
351 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
352 }
353 }
354 }
355
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,b_zero_point)356 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, b_zero_point) {
357 TEST_REQUIRES_ARM_NEON;
358 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
359 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
360 VAddCMicrokernelTester()
361 .batch_size(batch_size)
362 .b_zero_point(b_zero_point)
363 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
364 }
365 }
366 }
367
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,y_zero_point)368 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, y_zero_point) {
369 TEST_REQUIRES_ARM_NEON;
370 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
371 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
372 VAddCMicrokernelTester()
373 .batch_size(batch_size)
374 .y_zero_point(y_zero_point)
375 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
376 }
377 }
378 }
379
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,a_scale)380 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, a_scale) {
381 TEST_REQUIRES_ARM_NEON;
382 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
383 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
384 VAddCMicrokernelTester()
385 .batch_size(batch_size)
386 .a_scale(a_scale)
387 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
388 }
389 }
390 }
391
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,b_scale)392 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, b_scale) {
393 TEST_REQUIRES_ARM_NEON;
394 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
395 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
396 VAddCMicrokernelTester()
397 .batch_size(batch_size)
398 .b_scale(b_scale)
399 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
400 }
401 }
402 }
403
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,y_scale)404 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, y_scale) {
405 TEST_REQUIRES_ARM_NEON;
406 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
407 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
408 VAddCMicrokernelTester()
409 .batch_size(batch_size)
410 .y_scale(y_scale)
411 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
412 }
413 }
414 }
415
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,qmin)416 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, qmin) {
417 TEST_REQUIRES_ARM_NEON;
418 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
419 VAddCMicrokernelTester()
420 .batch_size(batch_size)
421 .qmin(128)
422 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
423 }
424 }
425
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,qmax)426 TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, qmax) {
427 TEST_REQUIRES_ARM_NEON;
428 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
429 VAddCMicrokernelTester()
430 .batch_size(batch_size)
431 .qmax(128)
432 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
433 }
434 }
435 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
436
437
438 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,batch_eq_32)439 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, batch_eq_32) {
440 TEST_REQUIRES_ARM_NEON;
441 VAddCMicrokernelTester()
442 .batch_size(32)
443 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
444 }
445
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,batch_div_32)446 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, batch_div_32) {
447 TEST_REQUIRES_ARM_NEON;
448 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
449 VAddCMicrokernelTester()
450 .batch_size(batch_size)
451 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
452 }
453 }
454
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,batch_lt_32)455 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, batch_lt_32) {
456 TEST_REQUIRES_ARM_NEON;
457 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
458 VAddCMicrokernelTester()
459 .batch_size(batch_size)
460 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
461 }
462 }
463
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,batch_gt_32)464 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, batch_gt_32) {
465 TEST_REQUIRES_ARM_NEON;
466 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
467 VAddCMicrokernelTester()
468 .batch_size(batch_size)
469 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
470 }
471 }
472
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,inplace)473 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, inplace) {
474 TEST_REQUIRES_ARM_NEON;
475 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
476 VAddCMicrokernelTester()
477 .batch_size(batch_size)
478 .inplace(true)
479 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
480 }
481 }
482
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,a_zero_point)483 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, a_zero_point) {
484 TEST_REQUIRES_ARM_NEON;
485 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
486 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
487 VAddCMicrokernelTester()
488 .batch_size(batch_size)
489 .a_zero_point(a_zero_point)
490 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
491 }
492 }
493 }
494
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,b_zero_point)495 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, b_zero_point) {
496 TEST_REQUIRES_ARM_NEON;
497 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
498 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
499 VAddCMicrokernelTester()
500 .batch_size(batch_size)
501 .b_zero_point(b_zero_point)
502 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
503 }
504 }
505 }
506
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,y_zero_point)507 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, y_zero_point) {
508 TEST_REQUIRES_ARM_NEON;
509 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
510 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
511 VAddCMicrokernelTester()
512 .batch_size(batch_size)
513 .y_zero_point(y_zero_point)
514 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
515 }
516 }
517 }
518
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,a_scale)519 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, a_scale) {
520 TEST_REQUIRES_ARM_NEON;
521 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
522 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
523 VAddCMicrokernelTester()
524 .batch_size(batch_size)
525 .a_scale(a_scale)
526 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
527 }
528 }
529 }
530
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,b_scale)531 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, b_scale) {
532 TEST_REQUIRES_ARM_NEON;
533 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
534 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
535 VAddCMicrokernelTester()
536 .batch_size(batch_size)
537 .b_scale(b_scale)
538 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
539 }
540 }
541 }
542
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,y_scale)543 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, y_scale) {
544 TEST_REQUIRES_ARM_NEON;
545 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
546 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
547 VAddCMicrokernelTester()
548 .batch_size(batch_size)
549 .y_scale(y_scale)
550 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
551 }
552 }
553 }
554
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,qmin)555 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, qmin) {
556 TEST_REQUIRES_ARM_NEON;
557 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
558 VAddCMicrokernelTester()
559 .batch_size(batch_size)
560 .qmin(128)
561 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
562 }
563 }
564
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,qmax)565 TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, qmax) {
566 TEST_REQUIRES_ARM_NEON;
567 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
568 VAddCMicrokernelTester()
569 .batch_size(batch_size)
570 .qmax(128)
571 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
572 }
573 }
574 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
575
576
577 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,batch_eq_16)578 TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, batch_eq_16) {
579 TEST_REQUIRES_ARM_NEON;
580 VAddCMicrokernelTester()
581 .batch_size(16)
582 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
583 }
584
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,batch_div_16)585 TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, batch_div_16) {
586 TEST_REQUIRES_ARM_NEON;
587 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
588 VAddCMicrokernelTester()
589 .batch_size(batch_size)
590 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
591 }
592 }
593
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,batch_lt_16)594 TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, batch_lt_16) {
595 TEST_REQUIRES_ARM_NEON;
596 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
597 VAddCMicrokernelTester()
598 .batch_size(batch_size)
599 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
600 }
601 }
602
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,batch_gt_16)603 TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, batch_gt_16) {
604 TEST_REQUIRES_ARM_NEON;
605 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
606 VAddCMicrokernelTester()
607 .batch_size(batch_size)
608 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
609 }
610 }
611
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,inplace)612 TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, inplace) {
613 TEST_REQUIRES_ARM_NEON;
614 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
615 VAddCMicrokernelTester()
616 .batch_size(batch_size)
617 .inplace(true)
618 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
619 }
620 }
621
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,a_zero_point)622 TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, a_zero_point) {
623 TEST_REQUIRES_ARM_NEON;
624 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
625 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
626 VAddCMicrokernelTester()
627 .batch_size(batch_size)
628 .a_zero_point(a_zero_point)
629 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
630 }
631 }
632 }
633
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,b_zero_point)634 TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, b_zero_point) {
635 TEST_REQUIRES_ARM_NEON;
636 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
637 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
638 VAddCMicrokernelTester()
639 .batch_size(batch_size)
640 .b_zero_point(b_zero_point)
641 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
642 }
643 }
644 }
645
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,y_zero_point)646 TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, y_zero_point) {
647 TEST_REQUIRES_ARM_NEON;
648 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
649 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
650 VAddCMicrokernelTester()
651 .batch_size(batch_size)
652 .y_zero_point(y_zero_point)
653 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
654 }
655 }
656 }
657
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,a_scale)658 TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, a_scale) {
659 TEST_REQUIRES_ARM_NEON;
660 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
661 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
662 VAddCMicrokernelTester()
663 .batch_size(batch_size)
664 .a_scale(a_scale)
665 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
666 }
667 }
668 }
669
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,b_scale)670 TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, b_scale) {
671 TEST_REQUIRES_ARM_NEON;
672 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
673 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
674 VAddCMicrokernelTester()
675 .batch_size(batch_size)
676 .b_scale(b_scale)
677 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
678 }
679 }
680 }
681
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,y_scale)682 TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, y_scale) {
683 TEST_REQUIRES_ARM_NEON;
684 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
685 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
686 VAddCMicrokernelTester()
687 .batch_size(batch_size)
688 .y_scale(y_scale)
689 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
690 }
691 }
692 }
693
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,qmin)694 TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, qmin) {
695 TEST_REQUIRES_ARM_NEON;
696 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
697 VAddCMicrokernelTester()
698 .batch_size(batch_size)
699 .qmin(128)
700 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
701 }
702 }
703
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,qmax)704 TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, qmax) {
705 TEST_REQUIRES_ARM_NEON;
706 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
707 VAddCMicrokernelTester()
708 .batch_size(batch_size)
709 .qmax(128)
710 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
711 }
712 }
713 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
714
715
716 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,batch_eq_32)717 TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, batch_eq_32) {
718 TEST_REQUIRES_ARM_NEON;
719 VAddCMicrokernelTester()
720 .batch_size(32)
721 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
722 }
723
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,batch_div_32)724 TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, batch_div_32) {
725 TEST_REQUIRES_ARM_NEON;
726 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
727 VAddCMicrokernelTester()
728 .batch_size(batch_size)
729 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
730 }
731 }
732
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,batch_lt_32)733 TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, batch_lt_32) {
734 TEST_REQUIRES_ARM_NEON;
735 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
736 VAddCMicrokernelTester()
737 .batch_size(batch_size)
738 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
739 }
740 }
741
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,batch_gt_32)742 TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, batch_gt_32) {
743 TEST_REQUIRES_ARM_NEON;
744 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
745 VAddCMicrokernelTester()
746 .batch_size(batch_size)
747 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
748 }
749 }
750
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,inplace)751 TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, inplace) {
752 TEST_REQUIRES_ARM_NEON;
753 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
754 VAddCMicrokernelTester()
755 .batch_size(batch_size)
756 .inplace(true)
757 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
758 }
759 }
760
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,a_zero_point)761 TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, a_zero_point) {
762 TEST_REQUIRES_ARM_NEON;
763 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
764 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
765 VAddCMicrokernelTester()
766 .batch_size(batch_size)
767 .a_zero_point(a_zero_point)
768 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
769 }
770 }
771 }
772
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,b_zero_point)773 TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, b_zero_point) {
774 TEST_REQUIRES_ARM_NEON;
775 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
776 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
777 VAddCMicrokernelTester()
778 .batch_size(batch_size)
779 .b_zero_point(b_zero_point)
780 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
781 }
782 }
783 }
784
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,y_zero_point)785 TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, y_zero_point) {
786 TEST_REQUIRES_ARM_NEON;
787 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
788 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
789 VAddCMicrokernelTester()
790 .batch_size(batch_size)
791 .y_zero_point(y_zero_point)
792 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
793 }
794 }
795 }
796
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,a_scale)797 TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, a_scale) {
798 TEST_REQUIRES_ARM_NEON;
799 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
800 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
801 VAddCMicrokernelTester()
802 .batch_size(batch_size)
803 .a_scale(a_scale)
804 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
805 }
806 }
807 }
808
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,b_scale)809 TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, b_scale) {
810 TEST_REQUIRES_ARM_NEON;
811 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
812 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
813 VAddCMicrokernelTester()
814 .batch_size(batch_size)
815 .b_scale(b_scale)
816 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
817 }
818 }
819 }
820
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,y_scale)821 TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, y_scale) {
822 TEST_REQUIRES_ARM_NEON;
823 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
824 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
825 VAddCMicrokernelTester()
826 .batch_size(batch_size)
827 .y_scale(y_scale)
828 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
829 }
830 }
831 }
832
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,qmin)833 TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, qmin) {
834 TEST_REQUIRES_ARM_NEON;
835 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
836 VAddCMicrokernelTester()
837 .batch_size(batch_size)
838 .qmin(128)
839 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
840 }
841 }
842
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,qmax)843 TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, qmax) {
844 TEST_REQUIRES_ARM_NEON;
845 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
846 VAddCMicrokernelTester()
847 .batch_size(batch_size)
848 .qmax(128)
849 .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
850 }
851 }
852 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
853
854
855 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,batch_eq_8)856 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, batch_eq_8) {
857 TEST_REQUIRES_X86_SSE2;
858 VAddCMicrokernelTester()
859 .batch_size(8)
860 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
861 }
862
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,batch_div_8)863 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, batch_div_8) {
864 TEST_REQUIRES_X86_SSE2;
865 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
866 VAddCMicrokernelTester()
867 .batch_size(batch_size)
868 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
869 }
870 }
871
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,batch_lt_8)872 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, batch_lt_8) {
873 TEST_REQUIRES_X86_SSE2;
874 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
875 VAddCMicrokernelTester()
876 .batch_size(batch_size)
877 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
878 }
879 }
880
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,batch_gt_8)881 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, batch_gt_8) {
882 TEST_REQUIRES_X86_SSE2;
883 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
884 VAddCMicrokernelTester()
885 .batch_size(batch_size)
886 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
887 }
888 }
889
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,inplace)890 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, inplace) {
891 TEST_REQUIRES_X86_SSE2;
892 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
893 VAddCMicrokernelTester()
894 .batch_size(batch_size)
895 .inplace(true)
896 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
897 }
898 }
899
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,a_zero_point)900 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, a_zero_point) {
901 TEST_REQUIRES_X86_SSE2;
902 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
903 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
904 VAddCMicrokernelTester()
905 .batch_size(batch_size)
906 .a_zero_point(a_zero_point)
907 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
908 }
909 }
910 }
911
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,b_zero_point)912 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, b_zero_point) {
913 TEST_REQUIRES_X86_SSE2;
914 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
915 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
916 VAddCMicrokernelTester()
917 .batch_size(batch_size)
918 .b_zero_point(b_zero_point)
919 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
920 }
921 }
922 }
923
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,y_zero_point)924 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, y_zero_point) {
925 TEST_REQUIRES_X86_SSE2;
926 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
927 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
928 VAddCMicrokernelTester()
929 .batch_size(batch_size)
930 .y_zero_point(y_zero_point)
931 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
932 }
933 }
934 }
935
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,a_scale)936 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, a_scale) {
937 TEST_REQUIRES_X86_SSE2;
938 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
939 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
940 VAddCMicrokernelTester()
941 .batch_size(batch_size)
942 .a_scale(a_scale)
943 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
944 }
945 }
946 }
947
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,b_scale)948 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, b_scale) {
949 TEST_REQUIRES_X86_SSE2;
950 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
951 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
952 VAddCMicrokernelTester()
953 .batch_size(batch_size)
954 .b_scale(b_scale)
955 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
956 }
957 }
958 }
959
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,y_scale)960 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, y_scale) {
961 TEST_REQUIRES_X86_SSE2;
962 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
963 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
964 VAddCMicrokernelTester()
965 .batch_size(batch_size)
966 .y_scale(y_scale)
967 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
968 }
969 }
970 }
971
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,qmin)972 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, qmin) {
973 TEST_REQUIRES_X86_SSE2;
974 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
975 VAddCMicrokernelTester()
976 .batch_size(batch_size)
977 .qmin(128)
978 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
979 }
980 }
981
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,qmax)982 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, qmax) {
983 TEST_REQUIRES_X86_SSE2;
984 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
985 VAddCMicrokernelTester()
986 .batch_size(batch_size)
987 .qmax(128)
988 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
989 }
990 }
991 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
992
993
994 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,batch_eq_16)995 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, batch_eq_16) {
996 TEST_REQUIRES_X86_SSE2;
997 VAddCMicrokernelTester()
998 .batch_size(16)
999 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1000 }
1001
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,batch_div_16)1002 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, batch_div_16) {
1003 TEST_REQUIRES_X86_SSE2;
1004 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1005 VAddCMicrokernelTester()
1006 .batch_size(batch_size)
1007 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1008 }
1009 }
1010
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,batch_lt_16)1011 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, batch_lt_16) {
1012 TEST_REQUIRES_X86_SSE2;
1013 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1014 VAddCMicrokernelTester()
1015 .batch_size(batch_size)
1016 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1017 }
1018 }
1019
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,batch_gt_16)1020 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, batch_gt_16) {
1021 TEST_REQUIRES_X86_SSE2;
1022 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1023 VAddCMicrokernelTester()
1024 .batch_size(batch_size)
1025 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1026 }
1027 }
1028
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,inplace)1029 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, inplace) {
1030 TEST_REQUIRES_X86_SSE2;
1031 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1032 VAddCMicrokernelTester()
1033 .batch_size(batch_size)
1034 .inplace(true)
1035 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1036 }
1037 }
1038
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,a_zero_point)1039 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, a_zero_point) {
1040 TEST_REQUIRES_X86_SSE2;
1041 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1042 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1043 VAddCMicrokernelTester()
1044 .batch_size(batch_size)
1045 .a_zero_point(a_zero_point)
1046 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1047 }
1048 }
1049 }
1050
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,b_zero_point)1051 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, b_zero_point) {
1052 TEST_REQUIRES_X86_SSE2;
1053 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1054 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1055 VAddCMicrokernelTester()
1056 .batch_size(batch_size)
1057 .b_zero_point(b_zero_point)
1058 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1059 }
1060 }
1061 }
1062
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,y_zero_point)1063 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, y_zero_point) {
1064 TEST_REQUIRES_X86_SSE2;
1065 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1066 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1067 VAddCMicrokernelTester()
1068 .batch_size(batch_size)
1069 .y_zero_point(y_zero_point)
1070 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1071 }
1072 }
1073 }
1074
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,a_scale)1075 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, a_scale) {
1076 TEST_REQUIRES_X86_SSE2;
1077 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1078 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1079 VAddCMicrokernelTester()
1080 .batch_size(batch_size)
1081 .a_scale(a_scale)
1082 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1083 }
1084 }
1085 }
1086
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,b_scale)1087 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, b_scale) {
1088 TEST_REQUIRES_X86_SSE2;
1089 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1090 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1091 VAddCMicrokernelTester()
1092 .batch_size(batch_size)
1093 .b_scale(b_scale)
1094 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1095 }
1096 }
1097 }
1098
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,y_scale)1099 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, y_scale) {
1100 TEST_REQUIRES_X86_SSE2;
1101 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1102 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1103 VAddCMicrokernelTester()
1104 .batch_size(batch_size)
1105 .y_scale(y_scale)
1106 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1107 }
1108 }
1109 }
1110
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,qmin)1111 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, qmin) {
1112 TEST_REQUIRES_X86_SSE2;
1113 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1114 VAddCMicrokernelTester()
1115 .batch_size(batch_size)
1116 .qmin(128)
1117 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1118 }
1119 }
1120
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,qmax)1121 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, qmax) {
1122 TEST_REQUIRES_X86_SSE2;
1123 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1124 VAddCMicrokernelTester()
1125 .batch_size(batch_size)
1126 .qmax(128)
1127 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1128 }
1129 }
1130 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1131
1132
1133 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,batch_eq_24)1134 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, batch_eq_24) {
1135 TEST_REQUIRES_X86_SSE2;
1136 VAddCMicrokernelTester()
1137 .batch_size(24)
1138 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1139 }
1140
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,batch_div_24)1141 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, batch_div_24) {
1142 TEST_REQUIRES_X86_SSE2;
1143 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1144 VAddCMicrokernelTester()
1145 .batch_size(batch_size)
1146 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1147 }
1148 }
1149
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,batch_lt_24)1150 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, batch_lt_24) {
1151 TEST_REQUIRES_X86_SSE2;
1152 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1153 VAddCMicrokernelTester()
1154 .batch_size(batch_size)
1155 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1156 }
1157 }
1158
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,batch_gt_24)1159 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, batch_gt_24) {
1160 TEST_REQUIRES_X86_SSE2;
1161 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1162 VAddCMicrokernelTester()
1163 .batch_size(batch_size)
1164 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1165 }
1166 }
1167
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,inplace)1168 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, inplace) {
1169 TEST_REQUIRES_X86_SSE2;
1170 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1171 VAddCMicrokernelTester()
1172 .batch_size(batch_size)
1173 .inplace(true)
1174 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1175 }
1176 }
1177
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,a_zero_point)1178 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, a_zero_point) {
1179 TEST_REQUIRES_X86_SSE2;
1180 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1181 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1182 VAddCMicrokernelTester()
1183 .batch_size(batch_size)
1184 .a_zero_point(a_zero_point)
1185 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1186 }
1187 }
1188 }
1189
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,b_zero_point)1190 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, b_zero_point) {
1191 TEST_REQUIRES_X86_SSE2;
1192 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1193 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1194 VAddCMicrokernelTester()
1195 .batch_size(batch_size)
1196 .b_zero_point(b_zero_point)
1197 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1198 }
1199 }
1200 }
1201
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,y_zero_point)1202 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, y_zero_point) {
1203 TEST_REQUIRES_X86_SSE2;
1204 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1205 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1206 VAddCMicrokernelTester()
1207 .batch_size(batch_size)
1208 .y_zero_point(y_zero_point)
1209 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1210 }
1211 }
1212 }
1213
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,a_scale)1214 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, a_scale) {
1215 TEST_REQUIRES_X86_SSE2;
1216 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1217 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1218 VAddCMicrokernelTester()
1219 .batch_size(batch_size)
1220 .a_scale(a_scale)
1221 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1222 }
1223 }
1224 }
1225
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,b_scale)1226 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, b_scale) {
1227 TEST_REQUIRES_X86_SSE2;
1228 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1229 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1230 VAddCMicrokernelTester()
1231 .batch_size(batch_size)
1232 .b_scale(b_scale)
1233 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1234 }
1235 }
1236 }
1237
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,y_scale)1238 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, y_scale) {
1239 TEST_REQUIRES_X86_SSE2;
1240 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1241 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1242 VAddCMicrokernelTester()
1243 .batch_size(batch_size)
1244 .y_scale(y_scale)
1245 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1246 }
1247 }
1248 }
1249
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,qmin)1250 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, qmin) {
1251 TEST_REQUIRES_X86_SSE2;
1252 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1253 VAddCMicrokernelTester()
1254 .batch_size(batch_size)
1255 .qmin(128)
1256 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1257 }
1258 }
1259
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,qmax)1260 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, qmax) {
1261 TEST_REQUIRES_X86_SSE2;
1262 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1263 VAddCMicrokernelTester()
1264 .batch_size(batch_size)
1265 .qmax(128)
1266 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1267 }
1268 }
1269 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1270
1271
1272 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,batch_eq_32)1273 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, batch_eq_32) {
1274 TEST_REQUIRES_X86_SSE2;
1275 VAddCMicrokernelTester()
1276 .batch_size(32)
1277 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1278 }
1279
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,batch_div_32)1280 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, batch_div_32) {
1281 TEST_REQUIRES_X86_SSE2;
1282 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
1283 VAddCMicrokernelTester()
1284 .batch_size(batch_size)
1285 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1286 }
1287 }
1288
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,batch_lt_32)1289 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, batch_lt_32) {
1290 TEST_REQUIRES_X86_SSE2;
1291 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
1292 VAddCMicrokernelTester()
1293 .batch_size(batch_size)
1294 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1295 }
1296 }
1297
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,batch_gt_32)1298 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, batch_gt_32) {
1299 TEST_REQUIRES_X86_SSE2;
1300 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
1301 VAddCMicrokernelTester()
1302 .batch_size(batch_size)
1303 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1304 }
1305 }
1306
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,inplace)1307 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, inplace) {
1308 TEST_REQUIRES_X86_SSE2;
1309 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1310 VAddCMicrokernelTester()
1311 .batch_size(batch_size)
1312 .inplace(true)
1313 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1314 }
1315 }
1316
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,a_zero_point)1317 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, a_zero_point) {
1318 TEST_REQUIRES_X86_SSE2;
1319 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1320 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1321 VAddCMicrokernelTester()
1322 .batch_size(batch_size)
1323 .a_zero_point(a_zero_point)
1324 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1325 }
1326 }
1327 }
1328
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,b_zero_point)1329 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, b_zero_point) {
1330 TEST_REQUIRES_X86_SSE2;
1331 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1332 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1333 VAddCMicrokernelTester()
1334 .batch_size(batch_size)
1335 .b_zero_point(b_zero_point)
1336 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1337 }
1338 }
1339 }
1340
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,y_zero_point)1341 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, y_zero_point) {
1342 TEST_REQUIRES_X86_SSE2;
1343 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1344 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1345 VAddCMicrokernelTester()
1346 .batch_size(batch_size)
1347 .y_zero_point(y_zero_point)
1348 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1349 }
1350 }
1351 }
1352
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,a_scale)1353 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, a_scale) {
1354 TEST_REQUIRES_X86_SSE2;
1355 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1356 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1357 VAddCMicrokernelTester()
1358 .batch_size(batch_size)
1359 .a_scale(a_scale)
1360 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1361 }
1362 }
1363 }
1364
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,b_scale)1365 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, b_scale) {
1366 TEST_REQUIRES_X86_SSE2;
1367 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1368 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1369 VAddCMicrokernelTester()
1370 .batch_size(batch_size)
1371 .b_scale(b_scale)
1372 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1373 }
1374 }
1375 }
1376
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,y_scale)1377 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, y_scale) {
1378 TEST_REQUIRES_X86_SSE2;
1379 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1380 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1381 VAddCMicrokernelTester()
1382 .batch_size(batch_size)
1383 .y_scale(y_scale)
1384 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1385 }
1386 }
1387 }
1388
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,qmin)1389 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, qmin) {
1390 TEST_REQUIRES_X86_SSE2;
1391 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1392 VAddCMicrokernelTester()
1393 .batch_size(batch_size)
1394 .qmin(128)
1395 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1396 }
1397 }
1398
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,qmax)1399 TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, qmax) {
1400 TEST_REQUIRES_X86_SSE2;
1401 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1402 VAddCMicrokernelTester()
1403 .batch_size(batch_size)
1404 .qmax(128)
1405 .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1406 }
1407 }
1408 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1409
1410
1411 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,batch_eq_8)1412 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, batch_eq_8) {
1413 TEST_REQUIRES_X86_SSE41;
1414 VAddCMicrokernelTester()
1415 .batch_size(8)
1416 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1417 }
1418
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,batch_div_8)1419 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, batch_div_8) {
1420 TEST_REQUIRES_X86_SSE41;
1421 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1422 VAddCMicrokernelTester()
1423 .batch_size(batch_size)
1424 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1425 }
1426 }
1427
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,batch_lt_8)1428 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, batch_lt_8) {
1429 TEST_REQUIRES_X86_SSE41;
1430 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1431 VAddCMicrokernelTester()
1432 .batch_size(batch_size)
1433 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1434 }
1435 }
1436
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,batch_gt_8)1437 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, batch_gt_8) {
1438 TEST_REQUIRES_X86_SSE41;
1439 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1440 VAddCMicrokernelTester()
1441 .batch_size(batch_size)
1442 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1443 }
1444 }
1445
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,inplace)1446 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, inplace) {
1447 TEST_REQUIRES_X86_SSE41;
1448 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1449 VAddCMicrokernelTester()
1450 .batch_size(batch_size)
1451 .inplace(true)
1452 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1453 }
1454 }
1455
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,a_zero_point)1456 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, a_zero_point) {
1457 TEST_REQUIRES_X86_SSE41;
1458 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1459 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1460 VAddCMicrokernelTester()
1461 .batch_size(batch_size)
1462 .a_zero_point(a_zero_point)
1463 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1464 }
1465 }
1466 }
1467
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,b_zero_point)1468 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, b_zero_point) {
1469 TEST_REQUIRES_X86_SSE41;
1470 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1471 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1472 VAddCMicrokernelTester()
1473 .batch_size(batch_size)
1474 .b_zero_point(b_zero_point)
1475 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1476 }
1477 }
1478 }
1479
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,y_zero_point)1480 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, y_zero_point) {
1481 TEST_REQUIRES_X86_SSE41;
1482 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1483 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1484 VAddCMicrokernelTester()
1485 .batch_size(batch_size)
1486 .y_zero_point(y_zero_point)
1487 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1488 }
1489 }
1490 }
1491
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,a_scale)1492 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, a_scale) {
1493 TEST_REQUIRES_X86_SSE41;
1494 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1495 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1496 VAddCMicrokernelTester()
1497 .batch_size(batch_size)
1498 .a_scale(a_scale)
1499 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1500 }
1501 }
1502 }
1503
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,b_scale)1504 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, b_scale) {
1505 TEST_REQUIRES_X86_SSE41;
1506 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1507 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1508 VAddCMicrokernelTester()
1509 .batch_size(batch_size)
1510 .b_scale(b_scale)
1511 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1512 }
1513 }
1514 }
1515
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,y_scale)1516 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, y_scale) {
1517 TEST_REQUIRES_X86_SSE41;
1518 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1519 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1520 VAddCMicrokernelTester()
1521 .batch_size(batch_size)
1522 .y_scale(y_scale)
1523 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1524 }
1525 }
1526 }
1527
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,qmin)1528 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, qmin) {
1529 TEST_REQUIRES_X86_SSE41;
1530 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1531 VAddCMicrokernelTester()
1532 .batch_size(batch_size)
1533 .qmin(128)
1534 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1535 }
1536 }
1537
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,qmax)1538 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, qmax) {
1539 TEST_REQUIRES_X86_SSE41;
1540 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1541 VAddCMicrokernelTester()
1542 .batch_size(batch_size)
1543 .qmax(128)
1544 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1545 }
1546 }
1547 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1548
1549
1550 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,batch_eq_16)1551 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, batch_eq_16) {
1552 TEST_REQUIRES_X86_SSE41;
1553 VAddCMicrokernelTester()
1554 .batch_size(16)
1555 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1556 }
1557
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,batch_div_16)1558 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, batch_div_16) {
1559 TEST_REQUIRES_X86_SSE41;
1560 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1561 VAddCMicrokernelTester()
1562 .batch_size(batch_size)
1563 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1564 }
1565 }
1566
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,batch_lt_16)1567 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, batch_lt_16) {
1568 TEST_REQUIRES_X86_SSE41;
1569 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1570 VAddCMicrokernelTester()
1571 .batch_size(batch_size)
1572 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1573 }
1574 }
1575
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,batch_gt_16)1576 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, batch_gt_16) {
1577 TEST_REQUIRES_X86_SSE41;
1578 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1579 VAddCMicrokernelTester()
1580 .batch_size(batch_size)
1581 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1582 }
1583 }
1584
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,inplace)1585 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, inplace) {
1586 TEST_REQUIRES_X86_SSE41;
1587 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1588 VAddCMicrokernelTester()
1589 .batch_size(batch_size)
1590 .inplace(true)
1591 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1592 }
1593 }
1594
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,a_zero_point)1595 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, a_zero_point) {
1596 TEST_REQUIRES_X86_SSE41;
1597 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1598 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1599 VAddCMicrokernelTester()
1600 .batch_size(batch_size)
1601 .a_zero_point(a_zero_point)
1602 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1603 }
1604 }
1605 }
1606
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,b_zero_point)1607 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, b_zero_point) {
1608 TEST_REQUIRES_X86_SSE41;
1609 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1610 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1611 VAddCMicrokernelTester()
1612 .batch_size(batch_size)
1613 .b_zero_point(b_zero_point)
1614 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1615 }
1616 }
1617 }
1618
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,y_zero_point)1619 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, y_zero_point) {
1620 TEST_REQUIRES_X86_SSE41;
1621 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1622 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1623 VAddCMicrokernelTester()
1624 .batch_size(batch_size)
1625 .y_zero_point(y_zero_point)
1626 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1627 }
1628 }
1629 }
1630
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,a_scale)1631 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, a_scale) {
1632 TEST_REQUIRES_X86_SSE41;
1633 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1634 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1635 VAddCMicrokernelTester()
1636 .batch_size(batch_size)
1637 .a_scale(a_scale)
1638 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1639 }
1640 }
1641 }
1642
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,b_scale)1643 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, b_scale) {
1644 TEST_REQUIRES_X86_SSE41;
1645 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1646 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1647 VAddCMicrokernelTester()
1648 .batch_size(batch_size)
1649 .b_scale(b_scale)
1650 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1651 }
1652 }
1653 }
1654
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,y_scale)1655 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, y_scale) {
1656 TEST_REQUIRES_X86_SSE41;
1657 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1658 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1659 VAddCMicrokernelTester()
1660 .batch_size(batch_size)
1661 .y_scale(y_scale)
1662 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1663 }
1664 }
1665 }
1666
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,qmin)1667 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, qmin) {
1668 TEST_REQUIRES_X86_SSE41;
1669 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1670 VAddCMicrokernelTester()
1671 .batch_size(batch_size)
1672 .qmin(128)
1673 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1674 }
1675 }
1676
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,qmax)1677 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, qmax) {
1678 TEST_REQUIRES_X86_SSE41;
1679 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1680 VAddCMicrokernelTester()
1681 .batch_size(batch_size)
1682 .qmax(128)
1683 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1684 }
1685 }
1686 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1687
1688
1689 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,batch_eq_24)1690 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, batch_eq_24) {
1691 TEST_REQUIRES_X86_SSE41;
1692 VAddCMicrokernelTester()
1693 .batch_size(24)
1694 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1695 }
1696
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,batch_div_24)1697 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, batch_div_24) {
1698 TEST_REQUIRES_X86_SSE41;
1699 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1700 VAddCMicrokernelTester()
1701 .batch_size(batch_size)
1702 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1703 }
1704 }
1705
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,batch_lt_24)1706 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, batch_lt_24) {
1707 TEST_REQUIRES_X86_SSE41;
1708 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1709 VAddCMicrokernelTester()
1710 .batch_size(batch_size)
1711 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1712 }
1713 }
1714
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,batch_gt_24)1715 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, batch_gt_24) {
1716 TEST_REQUIRES_X86_SSE41;
1717 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1718 VAddCMicrokernelTester()
1719 .batch_size(batch_size)
1720 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1721 }
1722 }
1723
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,inplace)1724 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, inplace) {
1725 TEST_REQUIRES_X86_SSE41;
1726 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1727 VAddCMicrokernelTester()
1728 .batch_size(batch_size)
1729 .inplace(true)
1730 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1731 }
1732 }
1733
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,a_zero_point)1734 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, a_zero_point) {
1735 TEST_REQUIRES_X86_SSE41;
1736 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1737 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1738 VAddCMicrokernelTester()
1739 .batch_size(batch_size)
1740 .a_zero_point(a_zero_point)
1741 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1742 }
1743 }
1744 }
1745
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,b_zero_point)1746 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, b_zero_point) {
1747 TEST_REQUIRES_X86_SSE41;
1748 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1749 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1750 VAddCMicrokernelTester()
1751 .batch_size(batch_size)
1752 .b_zero_point(b_zero_point)
1753 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1754 }
1755 }
1756 }
1757
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,y_zero_point)1758 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, y_zero_point) {
1759 TEST_REQUIRES_X86_SSE41;
1760 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1761 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1762 VAddCMicrokernelTester()
1763 .batch_size(batch_size)
1764 .y_zero_point(y_zero_point)
1765 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1766 }
1767 }
1768 }
1769
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,a_scale)1770 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, a_scale) {
1771 TEST_REQUIRES_X86_SSE41;
1772 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1773 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1774 VAddCMicrokernelTester()
1775 .batch_size(batch_size)
1776 .a_scale(a_scale)
1777 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1778 }
1779 }
1780 }
1781
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,b_scale)1782 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, b_scale) {
1783 TEST_REQUIRES_X86_SSE41;
1784 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1785 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1786 VAddCMicrokernelTester()
1787 .batch_size(batch_size)
1788 .b_scale(b_scale)
1789 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1790 }
1791 }
1792 }
1793
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,y_scale)1794 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, y_scale) {
1795 TEST_REQUIRES_X86_SSE41;
1796 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1797 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1798 VAddCMicrokernelTester()
1799 .batch_size(batch_size)
1800 .y_scale(y_scale)
1801 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1802 }
1803 }
1804 }
1805
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,qmin)1806 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, qmin) {
1807 TEST_REQUIRES_X86_SSE41;
1808 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1809 VAddCMicrokernelTester()
1810 .batch_size(batch_size)
1811 .qmin(128)
1812 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1813 }
1814 }
1815
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,qmax)1816 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, qmax) {
1817 TEST_REQUIRES_X86_SSE41;
1818 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1819 VAddCMicrokernelTester()
1820 .batch_size(batch_size)
1821 .qmax(128)
1822 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1823 }
1824 }
1825 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1826
1827
1828 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,batch_eq_32)1829 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, batch_eq_32) {
1830 TEST_REQUIRES_X86_SSE41;
1831 VAddCMicrokernelTester()
1832 .batch_size(32)
1833 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1834 }
1835
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,batch_div_32)1836 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, batch_div_32) {
1837 TEST_REQUIRES_X86_SSE41;
1838 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
1839 VAddCMicrokernelTester()
1840 .batch_size(batch_size)
1841 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1842 }
1843 }
1844
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,batch_lt_32)1845 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, batch_lt_32) {
1846 TEST_REQUIRES_X86_SSE41;
1847 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
1848 VAddCMicrokernelTester()
1849 .batch_size(batch_size)
1850 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1851 }
1852 }
1853
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,batch_gt_32)1854 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, batch_gt_32) {
1855 TEST_REQUIRES_X86_SSE41;
1856 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
1857 VAddCMicrokernelTester()
1858 .batch_size(batch_size)
1859 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1860 }
1861 }
1862
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,inplace)1863 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, inplace) {
1864 TEST_REQUIRES_X86_SSE41;
1865 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1866 VAddCMicrokernelTester()
1867 .batch_size(batch_size)
1868 .inplace(true)
1869 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1870 }
1871 }
1872
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,a_zero_point)1873 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, a_zero_point) {
1874 TEST_REQUIRES_X86_SSE41;
1875 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1876 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1877 VAddCMicrokernelTester()
1878 .batch_size(batch_size)
1879 .a_zero_point(a_zero_point)
1880 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1881 }
1882 }
1883 }
1884
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,b_zero_point)1885 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, b_zero_point) {
1886 TEST_REQUIRES_X86_SSE41;
1887 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1888 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1889 VAddCMicrokernelTester()
1890 .batch_size(batch_size)
1891 .b_zero_point(b_zero_point)
1892 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1893 }
1894 }
1895 }
1896
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,y_zero_point)1897 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, y_zero_point) {
1898 TEST_REQUIRES_X86_SSE41;
1899 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1900 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1901 VAddCMicrokernelTester()
1902 .batch_size(batch_size)
1903 .y_zero_point(y_zero_point)
1904 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1905 }
1906 }
1907 }
1908
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,a_scale)1909 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, a_scale) {
1910 TEST_REQUIRES_X86_SSE41;
1911 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1912 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1913 VAddCMicrokernelTester()
1914 .batch_size(batch_size)
1915 .a_scale(a_scale)
1916 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1917 }
1918 }
1919 }
1920
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,b_scale)1921 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, b_scale) {
1922 TEST_REQUIRES_X86_SSE41;
1923 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1924 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1925 VAddCMicrokernelTester()
1926 .batch_size(batch_size)
1927 .b_scale(b_scale)
1928 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1929 }
1930 }
1931 }
1932
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,y_scale)1933 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, y_scale) {
1934 TEST_REQUIRES_X86_SSE41;
1935 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1936 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1937 VAddCMicrokernelTester()
1938 .batch_size(batch_size)
1939 .y_scale(y_scale)
1940 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1941 }
1942 }
1943 }
1944
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,qmin)1945 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, qmin) {
1946 TEST_REQUIRES_X86_SSE41;
1947 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1948 VAddCMicrokernelTester()
1949 .batch_size(batch_size)
1950 .qmin(128)
1951 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1952 }
1953 }
1954
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,qmax)1955 TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, qmax) {
1956 TEST_REQUIRES_X86_SSE41;
1957 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1958 VAddCMicrokernelTester()
1959 .batch_size(batch_size)
1960 .qmax(128)
1961 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1962 }
1963 }
1964 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1965
1966
1967 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,batch_eq_8)1968 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, batch_eq_8) {
1969 TEST_REQUIRES_X86_AVX;
1970 VAddCMicrokernelTester()
1971 .batch_size(8)
1972 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1973 }
1974
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,batch_div_8)1975 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, batch_div_8) {
1976 TEST_REQUIRES_X86_AVX;
1977 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1978 VAddCMicrokernelTester()
1979 .batch_size(batch_size)
1980 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1981 }
1982 }
1983
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,batch_lt_8)1984 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, batch_lt_8) {
1985 TEST_REQUIRES_X86_AVX;
1986 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1987 VAddCMicrokernelTester()
1988 .batch_size(batch_size)
1989 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1990 }
1991 }
1992
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,batch_gt_8)1993 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, batch_gt_8) {
1994 TEST_REQUIRES_X86_AVX;
1995 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1996 VAddCMicrokernelTester()
1997 .batch_size(batch_size)
1998 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1999 }
2000 }
2001
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,inplace)2002 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, inplace) {
2003 TEST_REQUIRES_X86_AVX;
2004 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2005 VAddCMicrokernelTester()
2006 .batch_size(batch_size)
2007 .inplace(true)
2008 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2009 }
2010 }
2011
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,a_zero_point)2012 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, a_zero_point) {
2013 TEST_REQUIRES_X86_AVX;
2014 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2015 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2016 VAddCMicrokernelTester()
2017 .batch_size(batch_size)
2018 .a_zero_point(a_zero_point)
2019 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2020 }
2021 }
2022 }
2023
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,b_zero_point)2024 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, b_zero_point) {
2025 TEST_REQUIRES_X86_AVX;
2026 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2027 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2028 VAddCMicrokernelTester()
2029 .batch_size(batch_size)
2030 .b_zero_point(b_zero_point)
2031 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2032 }
2033 }
2034 }
2035
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,y_zero_point)2036 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, y_zero_point) {
2037 TEST_REQUIRES_X86_AVX;
2038 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2039 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2040 VAddCMicrokernelTester()
2041 .batch_size(batch_size)
2042 .y_zero_point(y_zero_point)
2043 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2044 }
2045 }
2046 }
2047
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,a_scale)2048 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, a_scale) {
2049 TEST_REQUIRES_X86_AVX;
2050 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2051 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2052 VAddCMicrokernelTester()
2053 .batch_size(batch_size)
2054 .a_scale(a_scale)
2055 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2056 }
2057 }
2058 }
2059
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,b_scale)2060 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, b_scale) {
2061 TEST_REQUIRES_X86_AVX;
2062 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2063 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2064 VAddCMicrokernelTester()
2065 .batch_size(batch_size)
2066 .b_scale(b_scale)
2067 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2068 }
2069 }
2070 }
2071
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,y_scale)2072 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, y_scale) {
2073 TEST_REQUIRES_X86_AVX;
2074 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2075 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2076 VAddCMicrokernelTester()
2077 .batch_size(batch_size)
2078 .y_scale(y_scale)
2079 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2080 }
2081 }
2082 }
2083
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,qmin)2084 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, qmin) {
2085 TEST_REQUIRES_X86_AVX;
2086 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2087 VAddCMicrokernelTester()
2088 .batch_size(batch_size)
2089 .qmin(128)
2090 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2091 }
2092 }
2093
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,qmax)2094 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, qmax) {
2095 TEST_REQUIRES_X86_AVX;
2096 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2097 VAddCMicrokernelTester()
2098 .batch_size(batch_size)
2099 .qmax(128)
2100 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2101 }
2102 }
2103 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2104
2105
2106 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,batch_eq_16)2107 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, batch_eq_16) {
2108 TEST_REQUIRES_X86_AVX;
2109 VAddCMicrokernelTester()
2110 .batch_size(16)
2111 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2112 }
2113
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,batch_div_16)2114 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, batch_div_16) {
2115 TEST_REQUIRES_X86_AVX;
2116 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2117 VAddCMicrokernelTester()
2118 .batch_size(batch_size)
2119 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2120 }
2121 }
2122
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,batch_lt_16)2123 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, batch_lt_16) {
2124 TEST_REQUIRES_X86_AVX;
2125 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2126 VAddCMicrokernelTester()
2127 .batch_size(batch_size)
2128 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2129 }
2130 }
2131
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,batch_gt_16)2132 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, batch_gt_16) {
2133 TEST_REQUIRES_X86_AVX;
2134 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2135 VAddCMicrokernelTester()
2136 .batch_size(batch_size)
2137 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2138 }
2139 }
2140
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,inplace)2141 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, inplace) {
2142 TEST_REQUIRES_X86_AVX;
2143 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2144 VAddCMicrokernelTester()
2145 .batch_size(batch_size)
2146 .inplace(true)
2147 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2148 }
2149 }
2150
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,a_zero_point)2151 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, a_zero_point) {
2152 TEST_REQUIRES_X86_AVX;
2153 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2154 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2155 VAddCMicrokernelTester()
2156 .batch_size(batch_size)
2157 .a_zero_point(a_zero_point)
2158 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2159 }
2160 }
2161 }
2162
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,b_zero_point)2163 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, b_zero_point) {
2164 TEST_REQUIRES_X86_AVX;
2165 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2166 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2167 VAddCMicrokernelTester()
2168 .batch_size(batch_size)
2169 .b_zero_point(b_zero_point)
2170 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2171 }
2172 }
2173 }
2174
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,y_zero_point)2175 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, y_zero_point) {
2176 TEST_REQUIRES_X86_AVX;
2177 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2178 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2179 VAddCMicrokernelTester()
2180 .batch_size(batch_size)
2181 .y_zero_point(y_zero_point)
2182 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2183 }
2184 }
2185 }
2186
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,a_scale)2187 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, a_scale) {
2188 TEST_REQUIRES_X86_AVX;
2189 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2190 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2191 VAddCMicrokernelTester()
2192 .batch_size(batch_size)
2193 .a_scale(a_scale)
2194 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2195 }
2196 }
2197 }
2198
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,b_scale)2199 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, b_scale) {
2200 TEST_REQUIRES_X86_AVX;
2201 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2202 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2203 VAddCMicrokernelTester()
2204 .batch_size(batch_size)
2205 .b_scale(b_scale)
2206 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2207 }
2208 }
2209 }
2210
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,y_scale)2211 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, y_scale) {
2212 TEST_REQUIRES_X86_AVX;
2213 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2214 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2215 VAddCMicrokernelTester()
2216 .batch_size(batch_size)
2217 .y_scale(y_scale)
2218 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2219 }
2220 }
2221 }
2222
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,qmin)2223 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, qmin) {
2224 TEST_REQUIRES_X86_AVX;
2225 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2226 VAddCMicrokernelTester()
2227 .batch_size(batch_size)
2228 .qmin(128)
2229 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2230 }
2231 }
2232
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,qmax)2233 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, qmax) {
2234 TEST_REQUIRES_X86_AVX;
2235 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2236 VAddCMicrokernelTester()
2237 .batch_size(batch_size)
2238 .qmax(128)
2239 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2240 }
2241 }
2242 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2243
2244
2245 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,batch_eq_24)2246 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, batch_eq_24) {
2247 TEST_REQUIRES_X86_AVX;
2248 VAddCMicrokernelTester()
2249 .batch_size(24)
2250 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2251 }
2252
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,batch_div_24)2253 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, batch_div_24) {
2254 TEST_REQUIRES_X86_AVX;
2255 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
2256 VAddCMicrokernelTester()
2257 .batch_size(batch_size)
2258 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2259 }
2260 }
2261
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,batch_lt_24)2262 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, batch_lt_24) {
2263 TEST_REQUIRES_X86_AVX;
2264 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
2265 VAddCMicrokernelTester()
2266 .batch_size(batch_size)
2267 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2268 }
2269 }
2270
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,batch_gt_24)2271 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, batch_gt_24) {
2272 TEST_REQUIRES_X86_AVX;
2273 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
2274 VAddCMicrokernelTester()
2275 .batch_size(batch_size)
2276 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2277 }
2278 }
2279
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,inplace)2280 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, inplace) {
2281 TEST_REQUIRES_X86_AVX;
2282 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2283 VAddCMicrokernelTester()
2284 .batch_size(batch_size)
2285 .inplace(true)
2286 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2287 }
2288 }
2289
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,a_zero_point)2290 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, a_zero_point) {
2291 TEST_REQUIRES_X86_AVX;
2292 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2293 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2294 VAddCMicrokernelTester()
2295 .batch_size(batch_size)
2296 .a_zero_point(a_zero_point)
2297 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2298 }
2299 }
2300 }
2301
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,b_zero_point)2302 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, b_zero_point) {
2303 TEST_REQUIRES_X86_AVX;
2304 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2305 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2306 VAddCMicrokernelTester()
2307 .batch_size(batch_size)
2308 .b_zero_point(b_zero_point)
2309 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2310 }
2311 }
2312 }
2313
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,y_zero_point)2314 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, y_zero_point) {
2315 TEST_REQUIRES_X86_AVX;
2316 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2317 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2318 VAddCMicrokernelTester()
2319 .batch_size(batch_size)
2320 .y_zero_point(y_zero_point)
2321 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2322 }
2323 }
2324 }
2325
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,a_scale)2326 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, a_scale) {
2327 TEST_REQUIRES_X86_AVX;
2328 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2329 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2330 VAddCMicrokernelTester()
2331 .batch_size(batch_size)
2332 .a_scale(a_scale)
2333 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2334 }
2335 }
2336 }
2337
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,b_scale)2338 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, b_scale) {
2339 TEST_REQUIRES_X86_AVX;
2340 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2341 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2342 VAddCMicrokernelTester()
2343 .batch_size(batch_size)
2344 .b_scale(b_scale)
2345 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2346 }
2347 }
2348 }
2349
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,y_scale)2350 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, y_scale) {
2351 TEST_REQUIRES_X86_AVX;
2352 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2353 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2354 VAddCMicrokernelTester()
2355 .batch_size(batch_size)
2356 .y_scale(y_scale)
2357 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2358 }
2359 }
2360 }
2361
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,qmin)2362 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, qmin) {
2363 TEST_REQUIRES_X86_AVX;
2364 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2365 VAddCMicrokernelTester()
2366 .batch_size(batch_size)
2367 .qmin(128)
2368 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2369 }
2370 }
2371
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,qmax)2372 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, qmax) {
2373 TEST_REQUIRES_X86_AVX;
2374 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2375 VAddCMicrokernelTester()
2376 .batch_size(batch_size)
2377 .qmax(128)
2378 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2379 }
2380 }
2381 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2382
2383
2384 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,batch_eq_32)2385 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, batch_eq_32) {
2386 TEST_REQUIRES_X86_AVX;
2387 VAddCMicrokernelTester()
2388 .batch_size(32)
2389 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2390 }
2391
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,batch_div_32)2392 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, batch_div_32) {
2393 TEST_REQUIRES_X86_AVX;
2394 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
2395 VAddCMicrokernelTester()
2396 .batch_size(batch_size)
2397 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2398 }
2399 }
2400
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,batch_lt_32)2401 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, batch_lt_32) {
2402 TEST_REQUIRES_X86_AVX;
2403 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
2404 VAddCMicrokernelTester()
2405 .batch_size(batch_size)
2406 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2407 }
2408 }
2409
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,batch_gt_32)2410 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, batch_gt_32) {
2411 TEST_REQUIRES_X86_AVX;
2412 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
2413 VAddCMicrokernelTester()
2414 .batch_size(batch_size)
2415 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2416 }
2417 }
2418
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,inplace)2419 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, inplace) {
2420 TEST_REQUIRES_X86_AVX;
2421 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2422 VAddCMicrokernelTester()
2423 .batch_size(batch_size)
2424 .inplace(true)
2425 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2426 }
2427 }
2428
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,a_zero_point)2429 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, a_zero_point) {
2430 TEST_REQUIRES_X86_AVX;
2431 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2432 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2433 VAddCMicrokernelTester()
2434 .batch_size(batch_size)
2435 .a_zero_point(a_zero_point)
2436 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2437 }
2438 }
2439 }
2440
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,b_zero_point)2441 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, b_zero_point) {
2442 TEST_REQUIRES_X86_AVX;
2443 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2444 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2445 VAddCMicrokernelTester()
2446 .batch_size(batch_size)
2447 .b_zero_point(b_zero_point)
2448 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2449 }
2450 }
2451 }
2452
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,y_zero_point)2453 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, y_zero_point) {
2454 TEST_REQUIRES_X86_AVX;
2455 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2456 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2457 VAddCMicrokernelTester()
2458 .batch_size(batch_size)
2459 .y_zero_point(y_zero_point)
2460 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2461 }
2462 }
2463 }
2464
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,a_scale)2465 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, a_scale) {
2466 TEST_REQUIRES_X86_AVX;
2467 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2468 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2469 VAddCMicrokernelTester()
2470 .batch_size(batch_size)
2471 .a_scale(a_scale)
2472 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2473 }
2474 }
2475 }
2476
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,b_scale)2477 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, b_scale) {
2478 TEST_REQUIRES_X86_AVX;
2479 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2480 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2481 VAddCMicrokernelTester()
2482 .batch_size(batch_size)
2483 .b_scale(b_scale)
2484 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2485 }
2486 }
2487 }
2488
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,y_scale)2489 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, y_scale) {
2490 TEST_REQUIRES_X86_AVX;
2491 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2492 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2493 VAddCMicrokernelTester()
2494 .batch_size(batch_size)
2495 .y_scale(y_scale)
2496 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2497 }
2498 }
2499 }
2500
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,qmin)2501 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, qmin) {
2502 TEST_REQUIRES_X86_AVX;
2503 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2504 VAddCMicrokernelTester()
2505 .batch_size(batch_size)
2506 .qmin(128)
2507 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2508 }
2509 }
2510
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,qmax)2511 TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, qmax) {
2512 TEST_REQUIRES_X86_AVX;
2513 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2514 VAddCMicrokernelTester()
2515 .batch_size(batch_size)
2516 .qmax(128)
2517 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2518 }
2519 }
2520 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2521
2522
2523 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,batch_eq_8)2524 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, batch_eq_8) {
2525 TEST_REQUIRES_X86_SSE41;
2526 VAddCMicrokernelTester()
2527 .batch_size(8)
2528 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2529 }
2530
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,batch_div_8)2531 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, batch_div_8) {
2532 TEST_REQUIRES_X86_SSE41;
2533 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2534 VAddCMicrokernelTester()
2535 .batch_size(batch_size)
2536 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2537 }
2538 }
2539
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,batch_lt_8)2540 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, batch_lt_8) {
2541 TEST_REQUIRES_X86_SSE41;
2542 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2543 VAddCMicrokernelTester()
2544 .batch_size(batch_size)
2545 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2546 }
2547 }
2548
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,batch_gt_8)2549 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, batch_gt_8) {
2550 TEST_REQUIRES_X86_SSE41;
2551 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2552 VAddCMicrokernelTester()
2553 .batch_size(batch_size)
2554 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2555 }
2556 }
2557
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,inplace)2558 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, inplace) {
2559 TEST_REQUIRES_X86_SSE41;
2560 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2561 VAddCMicrokernelTester()
2562 .batch_size(batch_size)
2563 .inplace(true)
2564 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2565 }
2566 }
2567
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,a_zero_point)2568 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, a_zero_point) {
2569 TEST_REQUIRES_X86_SSE41;
2570 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2571 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2572 VAddCMicrokernelTester()
2573 .batch_size(batch_size)
2574 .a_zero_point(a_zero_point)
2575 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2576 }
2577 }
2578 }
2579
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,b_zero_point)2580 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, b_zero_point) {
2581 TEST_REQUIRES_X86_SSE41;
2582 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2583 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2584 VAddCMicrokernelTester()
2585 .batch_size(batch_size)
2586 .b_zero_point(b_zero_point)
2587 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2588 }
2589 }
2590 }
2591
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,y_zero_point)2592 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, y_zero_point) {
2593 TEST_REQUIRES_X86_SSE41;
2594 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2595 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2596 VAddCMicrokernelTester()
2597 .batch_size(batch_size)
2598 .y_zero_point(y_zero_point)
2599 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2600 }
2601 }
2602 }
2603
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,a_scale)2604 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, a_scale) {
2605 TEST_REQUIRES_X86_SSE41;
2606 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2607 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2608 VAddCMicrokernelTester()
2609 .batch_size(batch_size)
2610 .a_scale(a_scale)
2611 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2612 }
2613 }
2614 }
2615
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,b_scale)2616 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, b_scale) {
2617 TEST_REQUIRES_X86_SSE41;
2618 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2619 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2620 VAddCMicrokernelTester()
2621 .batch_size(batch_size)
2622 .b_scale(b_scale)
2623 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2624 }
2625 }
2626 }
2627
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,y_scale)2628 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, y_scale) {
2629 TEST_REQUIRES_X86_SSE41;
2630 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2631 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2632 VAddCMicrokernelTester()
2633 .batch_size(batch_size)
2634 .y_scale(y_scale)
2635 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2636 }
2637 }
2638 }
2639
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,qmin)2640 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, qmin) {
2641 TEST_REQUIRES_X86_SSE41;
2642 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2643 VAddCMicrokernelTester()
2644 .batch_size(batch_size)
2645 .qmin(128)
2646 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2647 }
2648 }
2649
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,qmax)2650 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, qmax) {
2651 TEST_REQUIRES_X86_SSE41;
2652 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2653 VAddCMicrokernelTester()
2654 .batch_size(batch_size)
2655 .qmax(128)
2656 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2657 }
2658 }
2659 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2660
2661
2662 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,batch_eq_16)2663 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, batch_eq_16) {
2664 TEST_REQUIRES_X86_SSE41;
2665 VAddCMicrokernelTester()
2666 .batch_size(16)
2667 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2668 }
2669
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,batch_div_16)2670 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, batch_div_16) {
2671 TEST_REQUIRES_X86_SSE41;
2672 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2673 VAddCMicrokernelTester()
2674 .batch_size(batch_size)
2675 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2676 }
2677 }
2678
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,batch_lt_16)2679 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, batch_lt_16) {
2680 TEST_REQUIRES_X86_SSE41;
2681 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2682 VAddCMicrokernelTester()
2683 .batch_size(batch_size)
2684 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2685 }
2686 }
2687
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,batch_gt_16)2688 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, batch_gt_16) {
2689 TEST_REQUIRES_X86_SSE41;
2690 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2691 VAddCMicrokernelTester()
2692 .batch_size(batch_size)
2693 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2694 }
2695 }
2696
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,inplace)2697 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, inplace) {
2698 TEST_REQUIRES_X86_SSE41;
2699 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2700 VAddCMicrokernelTester()
2701 .batch_size(batch_size)
2702 .inplace(true)
2703 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2704 }
2705 }
2706
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,a_zero_point)2707 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, a_zero_point) {
2708 TEST_REQUIRES_X86_SSE41;
2709 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2710 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2711 VAddCMicrokernelTester()
2712 .batch_size(batch_size)
2713 .a_zero_point(a_zero_point)
2714 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2715 }
2716 }
2717 }
2718
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,b_zero_point)2719 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, b_zero_point) {
2720 TEST_REQUIRES_X86_SSE41;
2721 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2722 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2723 VAddCMicrokernelTester()
2724 .batch_size(batch_size)
2725 .b_zero_point(b_zero_point)
2726 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2727 }
2728 }
2729 }
2730
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,y_zero_point)2731 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, y_zero_point) {
2732 TEST_REQUIRES_X86_SSE41;
2733 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2734 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2735 VAddCMicrokernelTester()
2736 .batch_size(batch_size)
2737 .y_zero_point(y_zero_point)
2738 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2739 }
2740 }
2741 }
2742
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,a_scale)2743 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, a_scale) {
2744 TEST_REQUIRES_X86_SSE41;
2745 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2746 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2747 VAddCMicrokernelTester()
2748 .batch_size(batch_size)
2749 .a_scale(a_scale)
2750 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2751 }
2752 }
2753 }
2754
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,b_scale)2755 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, b_scale) {
2756 TEST_REQUIRES_X86_SSE41;
2757 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2758 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2759 VAddCMicrokernelTester()
2760 .batch_size(batch_size)
2761 .b_scale(b_scale)
2762 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2763 }
2764 }
2765 }
2766
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,y_scale)2767 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, y_scale) {
2768 TEST_REQUIRES_X86_SSE41;
2769 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2770 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2771 VAddCMicrokernelTester()
2772 .batch_size(batch_size)
2773 .y_scale(y_scale)
2774 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2775 }
2776 }
2777 }
2778
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,qmin)2779 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, qmin) {
2780 TEST_REQUIRES_X86_SSE41;
2781 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2782 VAddCMicrokernelTester()
2783 .batch_size(batch_size)
2784 .qmin(128)
2785 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2786 }
2787 }
2788
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,qmax)2789 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, qmax) {
2790 TEST_REQUIRES_X86_SSE41;
2791 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2792 VAddCMicrokernelTester()
2793 .batch_size(batch_size)
2794 .qmax(128)
2795 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2796 }
2797 }
2798 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2799
2800
2801 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,batch_eq_24)2802 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, batch_eq_24) {
2803 TEST_REQUIRES_X86_SSE41;
2804 VAddCMicrokernelTester()
2805 .batch_size(24)
2806 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2807 }
2808
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,batch_div_24)2809 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, batch_div_24) {
2810 TEST_REQUIRES_X86_SSE41;
2811 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
2812 VAddCMicrokernelTester()
2813 .batch_size(batch_size)
2814 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2815 }
2816 }
2817
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,batch_lt_24)2818 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, batch_lt_24) {
2819 TEST_REQUIRES_X86_SSE41;
2820 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
2821 VAddCMicrokernelTester()
2822 .batch_size(batch_size)
2823 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2824 }
2825 }
2826
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,batch_gt_24)2827 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, batch_gt_24) {
2828 TEST_REQUIRES_X86_SSE41;
2829 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
2830 VAddCMicrokernelTester()
2831 .batch_size(batch_size)
2832 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2833 }
2834 }
2835
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,inplace)2836 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, inplace) {
2837 TEST_REQUIRES_X86_SSE41;
2838 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2839 VAddCMicrokernelTester()
2840 .batch_size(batch_size)
2841 .inplace(true)
2842 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2843 }
2844 }
2845
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,a_zero_point)2846 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, a_zero_point) {
2847 TEST_REQUIRES_X86_SSE41;
2848 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2849 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2850 VAddCMicrokernelTester()
2851 .batch_size(batch_size)
2852 .a_zero_point(a_zero_point)
2853 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2854 }
2855 }
2856 }
2857
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,b_zero_point)2858 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, b_zero_point) {
2859 TEST_REQUIRES_X86_SSE41;
2860 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2861 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2862 VAddCMicrokernelTester()
2863 .batch_size(batch_size)
2864 .b_zero_point(b_zero_point)
2865 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2866 }
2867 }
2868 }
2869
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,y_zero_point)2870 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, y_zero_point) {
2871 TEST_REQUIRES_X86_SSE41;
2872 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2873 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2874 VAddCMicrokernelTester()
2875 .batch_size(batch_size)
2876 .y_zero_point(y_zero_point)
2877 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2878 }
2879 }
2880 }
2881
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,a_scale)2882 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, a_scale) {
2883 TEST_REQUIRES_X86_SSE41;
2884 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2885 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2886 VAddCMicrokernelTester()
2887 .batch_size(batch_size)
2888 .a_scale(a_scale)
2889 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2890 }
2891 }
2892 }
2893
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,b_scale)2894 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, b_scale) {
2895 TEST_REQUIRES_X86_SSE41;
2896 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2897 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2898 VAddCMicrokernelTester()
2899 .batch_size(batch_size)
2900 .b_scale(b_scale)
2901 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2902 }
2903 }
2904 }
2905
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,y_scale)2906 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, y_scale) {
2907 TEST_REQUIRES_X86_SSE41;
2908 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2909 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2910 VAddCMicrokernelTester()
2911 .batch_size(batch_size)
2912 .y_scale(y_scale)
2913 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2914 }
2915 }
2916 }
2917
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,qmin)2918 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, qmin) {
2919 TEST_REQUIRES_X86_SSE41;
2920 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2921 VAddCMicrokernelTester()
2922 .batch_size(batch_size)
2923 .qmin(128)
2924 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2925 }
2926 }
2927
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,qmax)2928 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, qmax) {
2929 TEST_REQUIRES_X86_SSE41;
2930 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2931 VAddCMicrokernelTester()
2932 .batch_size(batch_size)
2933 .qmax(128)
2934 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2935 }
2936 }
2937 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2938
2939
2940 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,batch_eq_32)2941 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, batch_eq_32) {
2942 TEST_REQUIRES_X86_SSE41;
2943 VAddCMicrokernelTester()
2944 .batch_size(32)
2945 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
2946 }
2947
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,batch_div_32)2948 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, batch_div_32) {
2949 TEST_REQUIRES_X86_SSE41;
2950 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
2951 VAddCMicrokernelTester()
2952 .batch_size(batch_size)
2953 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
2954 }
2955 }
2956
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,batch_lt_32)2957 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, batch_lt_32) {
2958 TEST_REQUIRES_X86_SSE41;
2959 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
2960 VAddCMicrokernelTester()
2961 .batch_size(batch_size)
2962 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
2963 }
2964 }
2965
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,batch_gt_32)2966 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, batch_gt_32) {
2967 TEST_REQUIRES_X86_SSE41;
2968 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
2969 VAddCMicrokernelTester()
2970 .batch_size(batch_size)
2971 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
2972 }
2973 }
2974
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,inplace)2975 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, inplace) {
2976 TEST_REQUIRES_X86_SSE41;
2977 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2978 VAddCMicrokernelTester()
2979 .batch_size(batch_size)
2980 .inplace(true)
2981 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
2982 }
2983 }
2984
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,a_zero_point)2985 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, a_zero_point) {
2986 TEST_REQUIRES_X86_SSE41;
2987 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2988 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2989 VAddCMicrokernelTester()
2990 .batch_size(batch_size)
2991 .a_zero_point(a_zero_point)
2992 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
2993 }
2994 }
2995 }
2996
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,b_zero_point)2997 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, b_zero_point) {
2998 TEST_REQUIRES_X86_SSE41;
2999 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3000 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3001 VAddCMicrokernelTester()
3002 .batch_size(batch_size)
3003 .b_zero_point(b_zero_point)
3004 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3005 }
3006 }
3007 }
3008
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,y_zero_point)3009 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, y_zero_point) {
3010 TEST_REQUIRES_X86_SSE41;
3011 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3012 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3013 VAddCMicrokernelTester()
3014 .batch_size(batch_size)
3015 .y_zero_point(y_zero_point)
3016 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3017 }
3018 }
3019 }
3020
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,a_scale)3021 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, a_scale) {
3022 TEST_REQUIRES_X86_SSE41;
3023 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3024 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3025 VAddCMicrokernelTester()
3026 .batch_size(batch_size)
3027 .a_scale(a_scale)
3028 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3029 }
3030 }
3031 }
3032
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,b_scale)3033 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, b_scale) {
3034 TEST_REQUIRES_X86_SSE41;
3035 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3036 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3037 VAddCMicrokernelTester()
3038 .batch_size(batch_size)
3039 .b_scale(b_scale)
3040 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3041 }
3042 }
3043 }
3044
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,y_scale)3045 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, y_scale) {
3046 TEST_REQUIRES_X86_SSE41;
3047 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3048 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3049 VAddCMicrokernelTester()
3050 .batch_size(batch_size)
3051 .y_scale(y_scale)
3052 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3053 }
3054 }
3055 }
3056
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,qmin)3057 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, qmin) {
3058 TEST_REQUIRES_X86_SSE41;
3059 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3060 VAddCMicrokernelTester()
3061 .batch_size(batch_size)
3062 .qmin(128)
3063 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3064 }
3065 }
3066
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,qmax)3067 TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, qmax) {
3068 TEST_REQUIRES_X86_SSE41;
3069 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3070 VAddCMicrokernelTester()
3071 .batch_size(batch_size)
3072 .qmax(128)
3073 .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3074 }
3075 }
3076 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3077
3078
3079 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,batch_eq_8)3080 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, batch_eq_8) {
3081 TEST_REQUIRES_X86_AVX;
3082 VAddCMicrokernelTester()
3083 .batch_size(8)
3084 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3085 }
3086
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,batch_div_8)3087 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, batch_div_8) {
3088 TEST_REQUIRES_X86_AVX;
3089 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
3090 VAddCMicrokernelTester()
3091 .batch_size(batch_size)
3092 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3093 }
3094 }
3095
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,batch_lt_8)3096 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, batch_lt_8) {
3097 TEST_REQUIRES_X86_AVX;
3098 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
3099 VAddCMicrokernelTester()
3100 .batch_size(batch_size)
3101 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3102 }
3103 }
3104
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,batch_gt_8)3105 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, batch_gt_8) {
3106 TEST_REQUIRES_X86_AVX;
3107 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
3108 VAddCMicrokernelTester()
3109 .batch_size(batch_size)
3110 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3111 }
3112 }
3113
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,inplace)3114 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, inplace) {
3115 TEST_REQUIRES_X86_AVX;
3116 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3117 VAddCMicrokernelTester()
3118 .batch_size(batch_size)
3119 .inplace(true)
3120 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3121 }
3122 }
3123
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,a_zero_point)3124 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, a_zero_point) {
3125 TEST_REQUIRES_X86_AVX;
3126 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3127 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3128 VAddCMicrokernelTester()
3129 .batch_size(batch_size)
3130 .a_zero_point(a_zero_point)
3131 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3132 }
3133 }
3134 }
3135
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,b_zero_point)3136 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, b_zero_point) {
3137 TEST_REQUIRES_X86_AVX;
3138 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3139 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3140 VAddCMicrokernelTester()
3141 .batch_size(batch_size)
3142 .b_zero_point(b_zero_point)
3143 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3144 }
3145 }
3146 }
3147
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,y_zero_point)3148 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, y_zero_point) {
3149 TEST_REQUIRES_X86_AVX;
3150 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3151 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3152 VAddCMicrokernelTester()
3153 .batch_size(batch_size)
3154 .y_zero_point(y_zero_point)
3155 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3156 }
3157 }
3158 }
3159
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,a_scale)3160 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, a_scale) {
3161 TEST_REQUIRES_X86_AVX;
3162 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3163 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3164 VAddCMicrokernelTester()
3165 .batch_size(batch_size)
3166 .a_scale(a_scale)
3167 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3168 }
3169 }
3170 }
3171
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,b_scale)3172 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, b_scale) {
3173 TEST_REQUIRES_X86_AVX;
3174 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3175 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3176 VAddCMicrokernelTester()
3177 .batch_size(batch_size)
3178 .b_scale(b_scale)
3179 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3180 }
3181 }
3182 }
3183
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,y_scale)3184 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, y_scale) {
3185 TEST_REQUIRES_X86_AVX;
3186 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3187 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3188 VAddCMicrokernelTester()
3189 .batch_size(batch_size)
3190 .y_scale(y_scale)
3191 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3192 }
3193 }
3194 }
3195
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,qmin)3196 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, qmin) {
3197 TEST_REQUIRES_X86_AVX;
3198 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3199 VAddCMicrokernelTester()
3200 .batch_size(batch_size)
3201 .qmin(128)
3202 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3203 }
3204 }
3205
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,qmax)3206 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, qmax) {
3207 TEST_REQUIRES_X86_AVX;
3208 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3209 VAddCMicrokernelTester()
3210 .batch_size(batch_size)
3211 .qmax(128)
3212 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3213 }
3214 }
3215 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3216
3217
3218 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,batch_eq_16)3219 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, batch_eq_16) {
3220 TEST_REQUIRES_X86_AVX;
3221 VAddCMicrokernelTester()
3222 .batch_size(16)
3223 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3224 }
3225
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,batch_div_16)3226 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, batch_div_16) {
3227 TEST_REQUIRES_X86_AVX;
3228 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
3229 VAddCMicrokernelTester()
3230 .batch_size(batch_size)
3231 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3232 }
3233 }
3234
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,batch_lt_16)3235 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, batch_lt_16) {
3236 TEST_REQUIRES_X86_AVX;
3237 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
3238 VAddCMicrokernelTester()
3239 .batch_size(batch_size)
3240 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3241 }
3242 }
3243
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,batch_gt_16)3244 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, batch_gt_16) {
3245 TEST_REQUIRES_X86_AVX;
3246 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
3247 VAddCMicrokernelTester()
3248 .batch_size(batch_size)
3249 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3250 }
3251 }
3252
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,inplace)3253 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, inplace) {
3254 TEST_REQUIRES_X86_AVX;
3255 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3256 VAddCMicrokernelTester()
3257 .batch_size(batch_size)
3258 .inplace(true)
3259 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3260 }
3261 }
3262
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,a_zero_point)3263 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, a_zero_point) {
3264 TEST_REQUIRES_X86_AVX;
3265 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3266 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3267 VAddCMicrokernelTester()
3268 .batch_size(batch_size)
3269 .a_zero_point(a_zero_point)
3270 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3271 }
3272 }
3273 }
3274
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,b_zero_point)3275 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, b_zero_point) {
3276 TEST_REQUIRES_X86_AVX;
3277 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3278 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3279 VAddCMicrokernelTester()
3280 .batch_size(batch_size)
3281 .b_zero_point(b_zero_point)
3282 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3283 }
3284 }
3285 }
3286
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,y_zero_point)3287 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, y_zero_point) {
3288 TEST_REQUIRES_X86_AVX;
3289 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3290 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3291 VAddCMicrokernelTester()
3292 .batch_size(batch_size)
3293 .y_zero_point(y_zero_point)
3294 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3295 }
3296 }
3297 }
3298
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,a_scale)3299 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, a_scale) {
3300 TEST_REQUIRES_X86_AVX;
3301 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3302 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3303 VAddCMicrokernelTester()
3304 .batch_size(batch_size)
3305 .a_scale(a_scale)
3306 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3307 }
3308 }
3309 }
3310
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,b_scale)3311 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, b_scale) {
3312 TEST_REQUIRES_X86_AVX;
3313 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3314 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3315 VAddCMicrokernelTester()
3316 .batch_size(batch_size)
3317 .b_scale(b_scale)
3318 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3319 }
3320 }
3321 }
3322
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,y_scale)3323 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, y_scale) {
3324 TEST_REQUIRES_X86_AVX;
3325 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3326 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3327 VAddCMicrokernelTester()
3328 .batch_size(batch_size)
3329 .y_scale(y_scale)
3330 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3331 }
3332 }
3333 }
3334
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,qmin)3335 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, qmin) {
3336 TEST_REQUIRES_X86_AVX;
3337 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3338 VAddCMicrokernelTester()
3339 .batch_size(batch_size)
3340 .qmin(128)
3341 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3342 }
3343 }
3344
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,qmax)3345 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, qmax) {
3346 TEST_REQUIRES_X86_AVX;
3347 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3348 VAddCMicrokernelTester()
3349 .batch_size(batch_size)
3350 .qmax(128)
3351 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3352 }
3353 }
3354 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3355
3356
3357 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,batch_eq_24)3358 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, batch_eq_24) {
3359 TEST_REQUIRES_X86_AVX;
3360 VAddCMicrokernelTester()
3361 .batch_size(24)
3362 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3363 }
3364
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,batch_div_24)3365 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, batch_div_24) {
3366 TEST_REQUIRES_X86_AVX;
3367 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
3368 VAddCMicrokernelTester()
3369 .batch_size(batch_size)
3370 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3371 }
3372 }
3373
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,batch_lt_24)3374 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, batch_lt_24) {
3375 TEST_REQUIRES_X86_AVX;
3376 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
3377 VAddCMicrokernelTester()
3378 .batch_size(batch_size)
3379 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3380 }
3381 }
3382
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,batch_gt_24)3383 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, batch_gt_24) {
3384 TEST_REQUIRES_X86_AVX;
3385 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
3386 VAddCMicrokernelTester()
3387 .batch_size(batch_size)
3388 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3389 }
3390 }
3391
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,inplace)3392 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, inplace) {
3393 TEST_REQUIRES_X86_AVX;
3394 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3395 VAddCMicrokernelTester()
3396 .batch_size(batch_size)
3397 .inplace(true)
3398 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3399 }
3400 }
3401
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,a_zero_point)3402 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, a_zero_point) {
3403 TEST_REQUIRES_X86_AVX;
3404 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3405 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3406 VAddCMicrokernelTester()
3407 .batch_size(batch_size)
3408 .a_zero_point(a_zero_point)
3409 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3410 }
3411 }
3412 }
3413
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,b_zero_point)3414 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, b_zero_point) {
3415 TEST_REQUIRES_X86_AVX;
3416 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3417 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3418 VAddCMicrokernelTester()
3419 .batch_size(batch_size)
3420 .b_zero_point(b_zero_point)
3421 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3422 }
3423 }
3424 }
3425
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,y_zero_point)3426 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, y_zero_point) {
3427 TEST_REQUIRES_X86_AVX;
3428 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3429 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3430 VAddCMicrokernelTester()
3431 .batch_size(batch_size)
3432 .y_zero_point(y_zero_point)
3433 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3434 }
3435 }
3436 }
3437
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,a_scale)3438 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, a_scale) {
3439 TEST_REQUIRES_X86_AVX;
3440 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3441 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3442 VAddCMicrokernelTester()
3443 .batch_size(batch_size)
3444 .a_scale(a_scale)
3445 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3446 }
3447 }
3448 }
3449
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,b_scale)3450 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, b_scale) {
3451 TEST_REQUIRES_X86_AVX;
3452 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3453 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3454 VAddCMicrokernelTester()
3455 .batch_size(batch_size)
3456 .b_scale(b_scale)
3457 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3458 }
3459 }
3460 }
3461
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,y_scale)3462 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, y_scale) {
3463 TEST_REQUIRES_X86_AVX;
3464 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3465 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3466 VAddCMicrokernelTester()
3467 .batch_size(batch_size)
3468 .y_scale(y_scale)
3469 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3470 }
3471 }
3472 }
3473
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,qmin)3474 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, qmin) {
3475 TEST_REQUIRES_X86_AVX;
3476 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3477 VAddCMicrokernelTester()
3478 .batch_size(batch_size)
3479 .qmin(128)
3480 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3481 }
3482 }
3483
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,qmax)3484 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, qmax) {
3485 TEST_REQUIRES_X86_AVX;
3486 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3487 VAddCMicrokernelTester()
3488 .batch_size(batch_size)
3489 .qmax(128)
3490 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3491 }
3492 }
3493 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3494
3495
3496 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,batch_eq_32)3497 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, batch_eq_32) {
3498 TEST_REQUIRES_X86_AVX;
3499 VAddCMicrokernelTester()
3500 .batch_size(32)
3501 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3502 }
3503
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,batch_div_32)3504 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, batch_div_32) {
3505 TEST_REQUIRES_X86_AVX;
3506 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
3507 VAddCMicrokernelTester()
3508 .batch_size(batch_size)
3509 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3510 }
3511 }
3512
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,batch_lt_32)3513 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, batch_lt_32) {
3514 TEST_REQUIRES_X86_AVX;
3515 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
3516 VAddCMicrokernelTester()
3517 .batch_size(batch_size)
3518 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3519 }
3520 }
3521
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,batch_gt_32)3522 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, batch_gt_32) {
3523 TEST_REQUIRES_X86_AVX;
3524 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
3525 VAddCMicrokernelTester()
3526 .batch_size(batch_size)
3527 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3528 }
3529 }
3530
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,inplace)3531 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, inplace) {
3532 TEST_REQUIRES_X86_AVX;
3533 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3534 VAddCMicrokernelTester()
3535 .batch_size(batch_size)
3536 .inplace(true)
3537 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3538 }
3539 }
3540
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,a_zero_point)3541 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, a_zero_point) {
3542 TEST_REQUIRES_X86_AVX;
3543 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3544 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3545 VAddCMicrokernelTester()
3546 .batch_size(batch_size)
3547 .a_zero_point(a_zero_point)
3548 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3549 }
3550 }
3551 }
3552
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,b_zero_point)3553 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, b_zero_point) {
3554 TEST_REQUIRES_X86_AVX;
3555 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3556 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3557 VAddCMicrokernelTester()
3558 .batch_size(batch_size)
3559 .b_zero_point(b_zero_point)
3560 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3561 }
3562 }
3563 }
3564
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,y_zero_point)3565 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, y_zero_point) {
3566 TEST_REQUIRES_X86_AVX;
3567 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3568 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3569 VAddCMicrokernelTester()
3570 .batch_size(batch_size)
3571 .y_zero_point(y_zero_point)
3572 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3573 }
3574 }
3575 }
3576
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,a_scale)3577 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, a_scale) {
3578 TEST_REQUIRES_X86_AVX;
3579 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3580 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3581 VAddCMicrokernelTester()
3582 .batch_size(batch_size)
3583 .a_scale(a_scale)
3584 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3585 }
3586 }
3587 }
3588
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,b_scale)3589 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, b_scale) {
3590 TEST_REQUIRES_X86_AVX;
3591 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3592 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3593 VAddCMicrokernelTester()
3594 .batch_size(batch_size)
3595 .b_scale(b_scale)
3596 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3597 }
3598 }
3599 }
3600
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,y_scale)3601 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, y_scale) {
3602 TEST_REQUIRES_X86_AVX;
3603 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3604 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3605 VAddCMicrokernelTester()
3606 .batch_size(batch_size)
3607 .y_scale(y_scale)
3608 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3609 }
3610 }
3611 }
3612
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,qmin)3613 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, qmin) {
3614 TEST_REQUIRES_X86_AVX;
3615 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3616 VAddCMicrokernelTester()
3617 .batch_size(batch_size)
3618 .qmin(128)
3619 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3620 }
3621 }
3622
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,qmax)3623 TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, qmax) {
3624 TEST_REQUIRES_X86_AVX;
3625 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3626 VAddCMicrokernelTester()
3627 .batch_size(batch_size)
3628 .qmax(128)
3629 .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3630 }
3631 }
3632 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3633
3634
3635 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,batch_eq_8)3636 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, batch_eq_8) {
3637 TEST_REQUIRES_X86_XOP;
3638 VAddCMicrokernelTester()
3639 .batch_size(8)
3640 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3641 }
3642
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,batch_div_8)3643 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, batch_div_8) {
3644 TEST_REQUIRES_X86_XOP;
3645 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
3646 VAddCMicrokernelTester()
3647 .batch_size(batch_size)
3648 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3649 }
3650 }
3651
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,batch_lt_8)3652 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, batch_lt_8) {
3653 TEST_REQUIRES_X86_XOP;
3654 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
3655 VAddCMicrokernelTester()
3656 .batch_size(batch_size)
3657 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3658 }
3659 }
3660
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,batch_gt_8)3661 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, batch_gt_8) {
3662 TEST_REQUIRES_X86_XOP;
3663 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
3664 VAddCMicrokernelTester()
3665 .batch_size(batch_size)
3666 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3667 }
3668 }
3669
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,inplace)3670 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, inplace) {
3671 TEST_REQUIRES_X86_XOP;
3672 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3673 VAddCMicrokernelTester()
3674 .batch_size(batch_size)
3675 .inplace(true)
3676 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3677 }
3678 }
3679
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,a_zero_point)3680 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, a_zero_point) {
3681 TEST_REQUIRES_X86_XOP;
3682 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3683 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3684 VAddCMicrokernelTester()
3685 .batch_size(batch_size)
3686 .a_zero_point(a_zero_point)
3687 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3688 }
3689 }
3690 }
3691
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,b_zero_point)3692 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, b_zero_point) {
3693 TEST_REQUIRES_X86_XOP;
3694 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3695 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3696 VAddCMicrokernelTester()
3697 .batch_size(batch_size)
3698 .b_zero_point(b_zero_point)
3699 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3700 }
3701 }
3702 }
3703
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,y_zero_point)3704 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, y_zero_point) {
3705 TEST_REQUIRES_X86_XOP;
3706 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3707 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3708 VAddCMicrokernelTester()
3709 .batch_size(batch_size)
3710 .y_zero_point(y_zero_point)
3711 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3712 }
3713 }
3714 }
3715
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,a_scale)3716 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, a_scale) {
3717 TEST_REQUIRES_X86_XOP;
3718 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3719 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3720 VAddCMicrokernelTester()
3721 .batch_size(batch_size)
3722 .a_scale(a_scale)
3723 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3724 }
3725 }
3726 }
3727
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,b_scale)3728 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, b_scale) {
3729 TEST_REQUIRES_X86_XOP;
3730 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3731 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3732 VAddCMicrokernelTester()
3733 .batch_size(batch_size)
3734 .b_scale(b_scale)
3735 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3736 }
3737 }
3738 }
3739
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,y_scale)3740 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, y_scale) {
3741 TEST_REQUIRES_X86_XOP;
3742 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3743 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3744 VAddCMicrokernelTester()
3745 .batch_size(batch_size)
3746 .y_scale(y_scale)
3747 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3748 }
3749 }
3750 }
3751
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,qmin)3752 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, qmin) {
3753 TEST_REQUIRES_X86_XOP;
3754 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3755 VAddCMicrokernelTester()
3756 .batch_size(batch_size)
3757 .qmin(128)
3758 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3759 }
3760 }
3761
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,qmax)3762 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, qmax) {
3763 TEST_REQUIRES_X86_XOP;
3764 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3765 VAddCMicrokernelTester()
3766 .batch_size(batch_size)
3767 .qmax(128)
3768 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3769 }
3770 }
3771 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3772
3773
3774 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,batch_eq_16)3775 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, batch_eq_16) {
3776 TEST_REQUIRES_X86_XOP;
3777 VAddCMicrokernelTester()
3778 .batch_size(16)
3779 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3780 }
3781
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,batch_div_16)3782 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, batch_div_16) {
3783 TEST_REQUIRES_X86_XOP;
3784 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
3785 VAddCMicrokernelTester()
3786 .batch_size(batch_size)
3787 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3788 }
3789 }
3790
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,batch_lt_16)3791 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, batch_lt_16) {
3792 TEST_REQUIRES_X86_XOP;
3793 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
3794 VAddCMicrokernelTester()
3795 .batch_size(batch_size)
3796 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3797 }
3798 }
3799
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,batch_gt_16)3800 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, batch_gt_16) {
3801 TEST_REQUIRES_X86_XOP;
3802 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
3803 VAddCMicrokernelTester()
3804 .batch_size(batch_size)
3805 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3806 }
3807 }
3808
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,inplace)3809 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, inplace) {
3810 TEST_REQUIRES_X86_XOP;
3811 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3812 VAddCMicrokernelTester()
3813 .batch_size(batch_size)
3814 .inplace(true)
3815 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3816 }
3817 }
3818
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,a_zero_point)3819 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, a_zero_point) {
3820 TEST_REQUIRES_X86_XOP;
3821 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3822 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3823 VAddCMicrokernelTester()
3824 .batch_size(batch_size)
3825 .a_zero_point(a_zero_point)
3826 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3827 }
3828 }
3829 }
3830
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,b_zero_point)3831 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, b_zero_point) {
3832 TEST_REQUIRES_X86_XOP;
3833 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3834 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3835 VAddCMicrokernelTester()
3836 .batch_size(batch_size)
3837 .b_zero_point(b_zero_point)
3838 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3839 }
3840 }
3841 }
3842
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,y_zero_point)3843 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, y_zero_point) {
3844 TEST_REQUIRES_X86_XOP;
3845 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3846 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3847 VAddCMicrokernelTester()
3848 .batch_size(batch_size)
3849 .y_zero_point(y_zero_point)
3850 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3851 }
3852 }
3853 }
3854
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,a_scale)3855 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, a_scale) {
3856 TEST_REQUIRES_X86_XOP;
3857 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3858 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3859 VAddCMicrokernelTester()
3860 .batch_size(batch_size)
3861 .a_scale(a_scale)
3862 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3863 }
3864 }
3865 }
3866
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,b_scale)3867 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, b_scale) {
3868 TEST_REQUIRES_X86_XOP;
3869 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3870 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3871 VAddCMicrokernelTester()
3872 .batch_size(batch_size)
3873 .b_scale(b_scale)
3874 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3875 }
3876 }
3877 }
3878
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,y_scale)3879 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, y_scale) {
3880 TEST_REQUIRES_X86_XOP;
3881 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3882 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3883 VAddCMicrokernelTester()
3884 .batch_size(batch_size)
3885 .y_scale(y_scale)
3886 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3887 }
3888 }
3889 }
3890
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,qmin)3891 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, qmin) {
3892 TEST_REQUIRES_X86_XOP;
3893 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3894 VAddCMicrokernelTester()
3895 .batch_size(batch_size)
3896 .qmin(128)
3897 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3898 }
3899 }
3900
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,qmax)3901 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, qmax) {
3902 TEST_REQUIRES_X86_XOP;
3903 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3904 VAddCMicrokernelTester()
3905 .batch_size(batch_size)
3906 .qmax(128)
3907 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3908 }
3909 }
3910 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3911
3912
3913 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,batch_eq_24)3914 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, batch_eq_24) {
3915 TEST_REQUIRES_X86_XOP;
3916 VAddCMicrokernelTester()
3917 .batch_size(24)
3918 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3919 }
3920
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,batch_div_24)3921 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, batch_div_24) {
3922 TEST_REQUIRES_X86_XOP;
3923 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
3924 VAddCMicrokernelTester()
3925 .batch_size(batch_size)
3926 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3927 }
3928 }
3929
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,batch_lt_24)3930 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, batch_lt_24) {
3931 TEST_REQUIRES_X86_XOP;
3932 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
3933 VAddCMicrokernelTester()
3934 .batch_size(batch_size)
3935 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3936 }
3937 }
3938
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,batch_gt_24)3939 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, batch_gt_24) {
3940 TEST_REQUIRES_X86_XOP;
3941 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
3942 VAddCMicrokernelTester()
3943 .batch_size(batch_size)
3944 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3945 }
3946 }
3947
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,inplace)3948 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, inplace) {
3949 TEST_REQUIRES_X86_XOP;
3950 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3951 VAddCMicrokernelTester()
3952 .batch_size(batch_size)
3953 .inplace(true)
3954 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3955 }
3956 }
3957
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,a_zero_point)3958 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, a_zero_point) {
3959 TEST_REQUIRES_X86_XOP;
3960 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3961 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3962 VAddCMicrokernelTester()
3963 .batch_size(batch_size)
3964 .a_zero_point(a_zero_point)
3965 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3966 }
3967 }
3968 }
3969
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,b_zero_point)3970 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, b_zero_point) {
3971 TEST_REQUIRES_X86_XOP;
3972 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3973 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3974 VAddCMicrokernelTester()
3975 .batch_size(batch_size)
3976 .b_zero_point(b_zero_point)
3977 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3978 }
3979 }
3980 }
3981
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,y_zero_point)3982 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, y_zero_point) {
3983 TEST_REQUIRES_X86_XOP;
3984 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3985 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3986 VAddCMicrokernelTester()
3987 .batch_size(batch_size)
3988 .y_zero_point(y_zero_point)
3989 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3990 }
3991 }
3992 }
3993
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,a_scale)3994 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, a_scale) {
3995 TEST_REQUIRES_X86_XOP;
3996 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3997 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3998 VAddCMicrokernelTester()
3999 .batch_size(batch_size)
4000 .a_scale(a_scale)
4001 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4002 }
4003 }
4004 }
4005
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,b_scale)4006 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, b_scale) {
4007 TEST_REQUIRES_X86_XOP;
4008 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4009 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4010 VAddCMicrokernelTester()
4011 .batch_size(batch_size)
4012 .b_scale(b_scale)
4013 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4014 }
4015 }
4016 }
4017
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,y_scale)4018 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, y_scale) {
4019 TEST_REQUIRES_X86_XOP;
4020 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4021 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4022 VAddCMicrokernelTester()
4023 .batch_size(batch_size)
4024 .y_scale(y_scale)
4025 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4026 }
4027 }
4028 }
4029
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,qmin)4030 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, qmin) {
4031 TEST_REQUIRES_X86_XOP;
4032 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4033 VAddCMicrokernelTester()
4034 .batch_size(batch_size)
4035 .qmin(128)
4036 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4037 }
4038 }
4039
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,qmax)4040 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, qmax) {
4041 TEST_REQUIRES_X86_XOP;
4042 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4043 VAddCMicrokernelTester()
4044 .batch_size(batch_size)
4045 .qmax(128)
4046 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4047 }
4048 }
4049 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4050
4051
4052 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,batch_eq_32)4053 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, batch_eq_32) {
4054 TEST_REQUIRES_X86_XOP;
4055 VAddCMicrokernelTester()
4056 .batch_size(32)
4057 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4058 }
4059
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,batch_div_32)4060 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, batch_div_32) {
4061 TEST_REQUIRES_X86_XOP;
4062 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
4063 VAddCMicrokernelTester()
4064 .batch_size(batch_size)
4065 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4066 }
4067 }
4068
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,batch_lt_32)4069 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, batch_lt_32) {
4070 TEST_REQUIRES_X86_XOP;
4071 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
4072 VAddCMicrokernelTester()
4073 .batch_size(batch_size)
4074 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4075 }
4076 }
4077
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,batch_gt_32)4078 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, batch_gt_32) {
4079 TEST_REQUIRES_X86_XOP;
4080 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
4081 VAddCMicrokernelTester()
4082 .batch_size(batch_size)
4083 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4084 }
4085 }
4086
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,inplace)4087 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, inplace) {
4088 TEST_REQUIRES_X86_XOP;
4089 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4090 VAddCMicrokernelTester()
4091 .batch_size(batch_size)
4092 .inplace(true)
4093 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4094 }
4095 }
4096
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,a_zero_point)4097 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, a_zero_point) {
4098 TEST_REQUIRES_X86_XOP;
4099 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4100 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4101 VAddCMicrokernelTester()
4102 .batch_size(batch_size)
4103 .a_zero_point(a_zero_point)
4104 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4105 }
4106 }
4107 }
4108
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,b_zero_point)4109 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, b_zero_point) {
4110 TEST_REQUIRES_X86_XOP;
4111 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4112 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4113 VAddCMicrokernelTester()
4114 .batch_size(batch_size)
4115 .b_zero_point(b_zero_point)
4116 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4117 }
4118 }
4119 }
4120
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,y_zero_point)4121 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, y_zero_point) {
4122 TEST_REQUIRES_X86_XOP;
4123 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4124 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4125 VAddCMicrokernelTester()
4126 .batch_size(batch_size)
4127 .y_zero_point(y_zero_point)
4128 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4129 }
4130 }
4131 }
4132
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,a_scale)4133 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, a_scale) {
4134 TEST_REQUIRES_X86_XOP;
4135 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4136 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4137 VAddCMicrokernelTester()
4138 .batch_size(batch_size)
4139 .a_scale(a_scale)
4140 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4141 }
4142 }
4143 }
4144
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,b_scale)4145 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, b_scale) {
4146 TEST_REQUIRES_X86_XOP;
4147 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4148 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4149 VAddCMicrokernelTester()
4150 .batch_size(batch_size)
4151 .b_scale(b_scale)
4152 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4153 }
4154 }
4155 }
4156
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,y_scale)4157 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, y_scale) {
4158 TEST_REQUIRES_X86_XOP;
4159 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4160 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4161 VAddCMicrokernelTester()
4162 .batch_size(batch_size)
4163 .y_scale(y_scale)
4164 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4165 }
4166 }
4167 }
4168
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,qmin)4169 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, qmin) {
4170 TEST_REQUIRES_X86_XOP;
4171 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4172 VAddCMicrokernelTester()
4173 .batch_size(batch_size)
4174 .qmin(128)
4175 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4176 }
4177 }
4178
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,qmax)4179 TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, qmax) {
4180 TEST_REQUIRES_X86_XOP;
4181 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4182 VAddCMicrokernelTester()
4183 .batch_size(batch_size)
4184 .qmax(128)
4185 .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4186 }
4187 }
4188 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4189
4190
4191 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,batch_eq_8)4192 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, batch_eq_8) {
4193 TEST_REQUIRES_X86_AVX2;
4194 VAddCMicrokernelTester()
4195 .batch_size(8)
4196 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4197 }
4198
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,batch_div_8)4199 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, batch_div_8) {
4200 TEST_REQUIRES_X86_AVX2;
4201 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
4202 VAddCMicrokernelTester()
4203 .batch_size(batch_size)
4204 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4205 }
4206 }
4207
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,batch_lt_8)4208 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, batch_lt_8) {
4209 TEST_REQUIRES_X86_AVX2;
4210 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
4211 VAddCMicrokernelTester()
4212 .batch_size(batch_size)
4213 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4214 }
4215 }
4216
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,batch_gt_8)4217 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, batch_gt_8) {
4218 TEST_REQUIRES_X86_AVX2;
4219 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
4220 VAddCMicrokernelTester()
4221 .batch_size(batch_size)
4222 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4223 }
4224 }
4225
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,inplace)4226 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, inplace) {
4227 TEST_REQUIRES_X86_AVX2;
4228 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4229 VAddCMicrokernelTester()
4230 .batch_size(batch_size)
4231 .inplace(true)
4232 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4233 }
4234 }
4235
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,a_zero_point)4236 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, a_zero_point) {
4237 TEST_REQUIRES_X86_AVX2;
4238 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4239 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4240 VAddCMicrokernelTester()
4241 .batch_size(batch_size)
4242 .a_zero_point(a_zero_point)
4243 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4244 }
4245 }
4246 }
4247
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,b_zero_point)4248 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, b_zero_point) {
4249 TEST_REQUIRES_X86_AVX2;
4250 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4251 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4252 VAddCMicrokernelTester()
4253 .batch_size(batch_size)
4254 .b_zero_point(b_zero_point)
4255 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4256 }
4257 }
4258 }
4259
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,y_zero_point)4260 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, y_zero_point) {
4261 TEST_REQUIRES_X86_AVX2;
4262 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4263 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4264 VAddCMicrokernelTester()
4265 .batch_size(batch_size)
4266 .y_zero_point(y_zero_point)
4267 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4268 }
4269 }
4270 }
4271
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,a_scale)4272 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, a_scale) {
4273 TEST_REQUIRES_X86_AVX2;
4274 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4275 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4276 VAddCMicrokernelTester()
4277 .batch_size(batch_size)
4278 .a_scale(a_scale)
4279 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4280 }
4281 }
4282 }
4283
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,b_scale)4284 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, b_scale) {
4285 TEST_REQUIRES_X86_AVX2;
4286 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4287 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4288 VAddCMicrokernelTester()
4289 .batch_size(batch_size)
4290 .b_scale(b_scale)
4291 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4292 }
4293 }
4294 }
4295
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,y_scale)4296 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, y_scale) {
4297 TEST_REQUIRES_X86_AVX2;
4298 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4299 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4300 VAddCMicrokernelTester()
4301 .batch_size(batch_size)
4302 .y_scale(y_scale)
4303 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4304 }
4305 }
4306 }
4307
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,qmin)4308 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, qmin) {
4309 TEST_REQUIRES_X86_AVX2;
4310 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4311 VAddCMicrokernelTester()
4312 .batch_size(batch_size)
4313 .qmin(128)
4314 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4315 }
4316 }
4317
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,qmax)4318 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, qmax) {
4319 TEST_REQUIRES_X86_AVX2;
4320 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4321 VAddCMicrokernelTester()
4322 .batch_size(batch_size)
4323 .qmax(128)
4324 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4325 }
4326 }
4327 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4328
4329
4330 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,batch_eq_16)4331 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, batch_eq_16) {
4332 TEST_REQUIRES_X86_AVX2;
4333 VAddCMicrokernelTester()
4334 .batch_size(16)
4335 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4336 }
4337
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,batch_div_16)4338 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, batch_div_16) {
4339 TEST_REQUIRES_X86_AVX2;
4340 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
4341 VAddCMicrokernelTester()
4342 .batch_size(batch_size)
4343 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4344 }
4345 }
4346
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,batch_lt_16)4347 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, batch_lt_16) {
4348 TEST_REQUIRES_X86_AVX2;
4349 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
4350 VAddCMicrokernelTester()
4351 .batch_size(batch_size)
4352 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4353 }
4354 }
4355
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,batch_gt_16)4356 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, batch_gt_16) {
4357 TEST_REQUIRES_X86_AVX2;
4358 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
4359 VAddCMicrokernelTester()
4360 .batch_size(batch_size)
4361 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4362 }
4363 }
4364
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,inplace)4365 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, inplace) {
4366 TEST_REQUIRES_X86_AVX2;
4367 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4368 VAddCMicrokernelTester()
4369 .batch_size(batch_size)
4370 .inplace(true)
4371 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4372 }
4373 }
4374
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,a_zero_point)4375 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, a_zero_point) {
4376 TEST_REQUIRES_X86_AVX2;
4377 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4378 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4379 VAddCMicrokernelTester()
4380 .batch_size(batch_size)
4381 .a_zero_point(a_zero_point)
4382 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4383 }
4384 }
4385 }
4386
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,b_zero_point)4387 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, b_zero_point) {
4388 TEST_REQUIRES_X86_AVX2;
4389 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4390 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4391 VAddCMicrokernelTester()
4392 .batch_size(batch_size)
4393 .b_zero_point(b_zero_point)
4394 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4395 }
4396 }
4397 }
4398
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,y_zero_point)4399 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, y_zero_point) {
4400 TEST_REQUIRES_X86_AVX2;
4401 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4402 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4403 VAddCMicrokernelTester()
4404 .batch_size(batch_size)
4405 .y_zero_point(y_zero_point)
4406 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4407 }
4408 }
4409 }
4410
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,a_scale)4411 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, a_scale) {
4412 TEST_REQUIRES_X86_AVX2;
4413 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4414 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4415 VAddCMicrokernelTester()
4416 .batch_size(batch_size)
4417 .a_scale(a_scale)
4418 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4419 }
4420 }
4421 }
4422
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,b_scale)4423 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, b_scale) {
4424 TEST_REQUIRES_X86_AVX2;
4425 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4426 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4427 VAddCMicrokernelTester()
4428 .batch_size(batch_size)
4429 .b_scale(b_scale)
4430 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4431 }
4432 }
4433 }
4434
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,y_scale)4435 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, y_scale) {
4436 TEST_REQUIRES_X86_AVX2;
4437 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4438 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4439 VAddCMicrokernelTester()
4440 .batch_size(batch_size)
4441 .y_scale(y_scale)
4442 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4443 }
4444 }
4445 }
4446
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,qmin)4447 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, qmin) {
4448 TEST_REQUIRES_X86_AVX2;
4449 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4450 VAddCMicrokernelTester()
4451 .batch_size(batch_size)
4452 .qmin(128)
4453 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4454 }
4455 }
4456
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,qmax)4457 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, qmax) {
4458 TEST_REQUIRES_X86_AVX2;
4459 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4460 VAddCMicrokernelTester()
4461 .batch_size(batch_size)
4462 .qmax(128)
4463 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4464 }
4465 }
4466 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4467
4468
4469 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,batch_eq_24)4470 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, batch_eq_24) {
4471 TEST_REQUIRES_X86_AVX2;
4472 VAddCMicrokernelTester()
4473 .batch_size(24)
4474 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4475 }
4476
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,batch_div_24)4477 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, batch_div_24) {
4478 TEST_REQUIRES_X86_AVX2;
4479 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
4480 VAddCMicrokernelTester()
4481 .batch_size(batch_size)
4482 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4483 }
4484 }
4485
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,batch_lt_24)4486 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, batch_lt_24) {
4487 TEST_REQUIRES_X86_AVX2;
4488 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
4489 VAddCMicrokernelTester()
4490 .batch_size(batch_size)
4491 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4492 }
4493 }
4494
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,batch_gt_24)4495 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, batch_gt_24) {
4496 TEST_REQUIRES_X86_AVX2;
4497 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
4498 VAddCMicrokernelTester()
4499 .batch_size(batch_size)
4500 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4501 }
4502 }
4503
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,inplace)4504 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, inplace) {
4505 TEST_REQUIRES_X86_AVX2;
4506 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4507 VAddCMicrokernelTester()
4508 .batch_size(batch_size)
4509 .inplace(true)
4510 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4511 }
4512 }
4513
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,a_zero_point)4514 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, a_zero_point) {
4515 TEST_REQUIRES_X86_AVX2;
4516 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4517 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4518 VAddCMicrokernelTester()
4519 .batch_size(batch_size)
4520 .a_zero_point(a_zero_point)
4521 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4522 }
4523 }
4524 }
4525
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,b_zero_point)4526 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, b_zero_point) {
4527 TEST_REQUIRES_X86_AVX2;
4528 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4529 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4530 VAddCMicrokernelTester()
4531 .batch_size(batch_size)
4532 .b_zero_point(b_zero_point)
4533 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4534 }
4535 }
4536 }
4537
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,y_zero_point)4538 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, y_zero_point) {
4539 TEST_REQUIRES_X86_AVX2;
4540 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4541 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4542 VAddCMicrokernelTester()
4543 .batch_size(batch_size)
4544 .y_zero_point(y_zero_point)
4545 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4546 }
4547 }
4548 }
4549
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,a_scale)4550 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, a_scale) {
4551 TEST_REQUIRES_X86_AVX2;
4552 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4553 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4554 VAddCMicrokernelTester()
4555 .batch_size(batch_size)
4556 .a_scale(a_scale)
4557 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4558 }
4559 }
4560 }
4561
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,b_scale)4562 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, b_scale) {
4563 TEST_REQUIRES_X86_AVX2;
4564 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4565 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4566 VAddCMicrokernelTester()
4567 .batch_size(batch_size)
4568 .b_scale(b_scale)
4569 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4570 }
4571 }
4572 }
4573
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,y_scale)4574 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, y_scale) {
4575 TEST_REQUIRES_X86_AVX2;
4576 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4577 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4578 VAddCMicrokernelTester()
4579 .batch_size(batch_size)
4580 .y_scale(y_scale)
4581 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4582 }
4583 }
4584 }
4585
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,qmin)4586 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, qmin) {
4587 TEST_REQUIRES_X86_AVX2;
4588 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4589 VAddCMicrokernelTester()
4590 .batch_size(batch_size)
4591 .qmin(128)
4592 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4593 }
4594 }
4595
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,qmax)4596 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, qmax) {
4597 TEST_REQUIRES_X86_AVX2;
4598 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4599 VAddCMicrokernelTester()
4600 .batch_size(batch_size)
4601 .qmax(128)
4602 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4603 }
4604 }
4605 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4606
4607
4608 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,batch_eq_32)4609 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, batch_eq_32) {
4610 TEST_REQUIRES_X86_AVX2;
4611 VAddCMicrokernelTester()
4612 .batch_size(32)
4613 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4614 }
4615
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,batch_div_32)4616 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, batch_div_32) {
4617 TEST_REQUIRES_X86_AVX2;
4618 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
4619 VAddCMicrokernelTester()
4620 .batch_size(batch_size)
4621 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4622 }
4623 }
4624
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,batch_lt_32)4625 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, batch_lt_32) {
4626 TEST_REQUIRES_X86_AVX2;
4627 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
4628 VAddCMicrokernelTester()
4629 .batch_size(batch_size)
4630 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4631 }
4632 }
4633
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,batch_gt_32)4634 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, batch_gt_32) {
4635 TEST_REQUIRES_X86_AVX2;
4636 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
4637 VAddCMicrokernelTester()
4638 .batch_size(batch_size)
4639 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4640 }
4641 }
4642
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,inplace)4643 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, inplace) {
4644 TEST_REQUIRES_X86_AVX2;
4645 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4646 VAddCMicrokernelTester()
4647 .batch_size(batch_size)
4648 .inplace(true)
4649 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4650 }
4651 }
4652
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,a_zero_point)4653 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, a_zero_point) {
4654 TEST_REQUIRES_X86_AVX2;
4655 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4656 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4657 VAddCMicrokernelTester()
4658 .batch_size(batch_size)
4659 .a_zero_point(a_zero_point)
4660 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4661 }
4662 }
4663 }
4664
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,b_zero_point)4665 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, b_zero_point) {
4666 TEST_REQUIRES_X86_AVX2;
4667 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4668 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4669 VAddCMicrokernelTester()
4670 .batch_size(batch_size)
4671 .b_zero_point(b_zero_point)
4672 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4673 }
4674 }
4675 }
4676
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,y_zero_point)4677 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, y_zero_point) {
4678 TEST_REQUIRES_X86_AVX2;
4679 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4680 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4681 VAddCMicrokernelTester()
4682 .batch_size(batch_size)
4683 .y_zero_point(y_zero_point)
4684 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4685 }
4686 }
4687 }
4688
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,a_scale)4689 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, a_scale) {
4690 TEST_REQUIRES_X86_AVX2;
4691 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4692 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4693 VAddCMicrokernelTester()
4694 .batch_size(batch_size)
4695 .a_scale(a_scale)
4696 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4697 }
4698 }
4699 }
4700
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,b_scale)4701 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, b_scale) {
4702 TEST_REQUIRES_X86_AVX2;
4703 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4704 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4705 VAddCMicrokernelTester()
4706 .batch_size(batch_size)
4707 .b_scale(b_scale)
4708 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4709 }
4710 }
4711 }
4712
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,y_scale)4713 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, y_scale) {
4714 TEST_REQUIRES_X86_AVX2;
4715 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4716 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4717 VAddCMicrokernelTester()
4718 .batch_size(batch_size)
4719 .y_scale(y_scale)
4720 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4721 }
4722 }
4723 }
4724
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,qmin)4725 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, qmin) {
4726 TEST_REQUIRES_X86_AVX2;
4727 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4728 VAddCMicrokernelTester()
4729 .batch_size(batch_size)
4730 .qmin(128)
4731 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4732 }
4733 }
4734
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,qmax)4735 TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, qmax) {
4736 TEST_REQUIRES_X86_AVX2;
4737 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4738 VAddCMicrokernelTester()
4739 .batch_size(batch_size)
4740 .qmax(128)
4741 .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4742 }
4743 }
4744 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4745
4746
4747 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,batch_eq_16)4748 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_eq_16) {
4749 TEST_REQUIRES_X86_AVX512SKX;
4750 VAddCMicrokernelTester()
4751 .batch_size(16)
4752 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4753 }
4754
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,batch_div_16)4755 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_div_16) {
4756 TEST_REQUIRES_X86_AVX512SKX;
4757 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
4758 VAddCMicrokernelTester()
4759 .batch_size(batch_size)
4760 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4761 }
4762 }
4763
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,batch_lt_16)4764 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_lt_16) {
4765 TEST_REQUIRES_X86_AVX512SKX;
4766 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
4767 VAddCMicrokernelTester()
4768 .batch_size(batch_size)
4769 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4770 }
4771 }
4772
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,batch_gt_16)4773 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_gt_16) {
4774 TEST_REQUIRES_X86_AVX512SKX;
4775 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
4776 VAddCMicrokernelTester()
4777 .batch_size(batch_size)
4778 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4779 }
4780 }
4781
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,inplace)4782 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, inplace) {
4783 TEST_REQUIRES_X86_AVX512SKX;
4784 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4785 VAddCMicrokernelTester()
4786 .batch_size(batch_size)
4787 .inplace(true)
4788 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4789 }
4790 }
4791
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,a_zero_point)4792 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, a_zero_point) {
4793 TEST_REQUIRES_X86_AVX512SKX;
4794 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4795 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4796 VAddCMicrokernelTester()
4797 .batch_size(batch_size)
4798 .a_zero_point(a_zero_point)
4799 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4800 }
4801 }
4802 }
4803
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,b_zero_point)4804 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, b_zero_point) {
4805 TEST_REQUIRES_X86_AVX512SKX;
4806 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4807 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4808 VAddCMicrokernelTester()
4809 .batch_size(batch_size)
4810 .b_zero_point(b_zero_point)
4811 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4812 }
4813 }
4814 }
4815
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,y_zero_point)4816 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, y_zero_point) {
4817 TEST_REQUIRES_X86_AVX512SKX;
4818 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4819 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4820 VAddCMicrokernelTester()
4821 .batch_size(batch_size)
4822 .y_zero_point(y_zero_point)
4823 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4824 }
4825 }
4826 }
4827
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,a_scale)4828 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, a_scale) {
4829 TEST_REQUIRES_X86_AVX512SKX;
4830 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4831 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4832 VAddCMicrokernelTester()
4833 .batch_size(batch_size)
4834 .a_scale(a_scale)
4835 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4836 }
4837 }
4838 }
4839
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,b_scale)4840 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, b_scale) {
4841 TEST_REQUIRES_X86_AVX512SKX;
4842 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4843 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4844 VAddCMicrokernelTester()
4845 .batch_size(batch_size)
4846 .b_scale(b_scale)
4847 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4848 }
4849 }
4850 }
4851
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,y_scale)4852 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, y_scale) {
4853 TEST_REQUIRES_X86_AVX512SKX;
4854 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4855 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4856 VAddCMicrokernelTester()
4857 .batch_size(batch_size)
4858 .y_scale(y_scale)
4859 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4860 }
4861 }
4862 }
4863
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,qmin)4864 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, qmin) {
4865 TEST_REQUIRES_X86_AVX512SKX;
4866 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4867 VAddCMicrokernelTester()
4868 .batch_size(batch_size)
4869 .qmin(128)
4870 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4871 }
4872 }
4873
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,qmax)4874 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, qmax) {
4875 TEST_REQUIRES_X86_AVX512SKX;
4876 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4877 VAddCMicrokernelTester()
4878 .batch_size(batch_size)
4879 .qmax(128)
4880 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4881 }
4882 }
4883 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4884
4885
4886 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,batch_eq_32)4887 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_eq_32) {
4888 TEST_REQUIRES_X86_AVX512SKX;
4889 VAddCMicrokernelTester()
4890 .batch_size(32)
4891 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
4892 }
4893
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,batch_div_32)4894 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_div_32) {
4895 TEST_REQUIRES_X86_AVX512SKX;
4896 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
4897 VAddCMicrokernelTester()
4898 .batch_size(batch_size)
4899 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
4900 }
4901 }
4902
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,batch_lt_32)4903 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_lt_32) {
4904 TEST_REQUIRES_X86_AVX512SKX;
4905 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
4906 VAddCMicrokernelTester()
4907 .batch_size(batch_size)
4908 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
4909 }
4910 }
4911
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,batch_gt_32)4912 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_gt_32) {
4913 TEST_REQUIRES_X86_AVX512SKX;
4914 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
4915 VAddCMicrokernelTester()
4916 .batch_size(batch_size)
4917 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
4918 }
4919 }
4920
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,inplace)4921 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, inplace) {
4922 TEST_REQUIRES_X86_AVX512SKX;
4923 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4924 VAddCMicrokernelTester()
4925 .batch_size(batch_size)
4926 .inplace(true)
4927 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
4928 }
4929 }
4930
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,a_zero_point)4931 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, a_zero_point) {
4932 TEST_REQUIRES_X86_AVX512SKX;
4933 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4934 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4935 VAddCMicrokernelTester()
4936 .batch_size(batch_size)
4937 .a_zero_point(a_zero_point)
4938 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
4939 }
4940 }
4941 }
4942
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,b_zero_point)4943 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, b_zero_point) {
4944 TEST_REQUIRES_X86_AVX512SKX;
4945 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4946 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4947 VAddCMicrokernelTester()
4948 .batch_size(batch_size)
4949 .b_zero_point(b_zero_point)
4950 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
4951 }
4952 }
4953 }
4954
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,y_zero_point)4955 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, y_zero_point) {
4956 TEST_REQUIRES_X86_AVX512SKX;
4957 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4958 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4959 VAddCMicrokernelTester()
4960 .batch_size(batch_size)
4961 .y_zero_point(y_zero_point)
4962 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
4963 }
4964 }
4965 }
4966
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,a_scale)4967 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, a_scale) {
4968 TEST_REQUIRES_X86_AVX512SKX;
4969 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4970 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4971 VAddCMicrokernelTester()
4972 .batch_size(batch_size)
4973 .a_scale(a_scale)
4974 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
4975 }
4976 }
4977 }
4978
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,b_scale)4979 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, b_scale) {
4980 TEST_REQUIRES_X86_AVX512SKX;
4981 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4982 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4983 VAddCMicrokernelTester()
4984 .batch_size(batch_size)
4985 .b_scale(b_scale)
4986 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
4987 }
4988 }
4989 }
4990
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,y_scale)4991 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, y_scale) {
4992 TEST_REQUIRES_X86_AVX512SKX;
4993 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4994 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4995 VAddCMicrokernelTester()
4996 .batch_size(batch_size)
4997 .y_scale(y_scale)
4998 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
4999 }
5000 }
5001 }
5002
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,qmin)5003 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, qmin) {
5004 TEST_REQUIRES_X86_AVX512SKX;
5005 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5006 VAddCMicrokernelTester()
5007 .batch_size(batch_size)
5008 .qmin(128)
5009 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5010 }
5011 }
5012
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,qmax)5013 TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, qmax) {
5014 TEST_REQUIRES_X86_AVX512SKX;
5015 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5016 VAddCMicrokernelTester()
5017 .batch_size(batch_size)
5018 .qmax(128)
5019 .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5020 }
5021 }
5022 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5023
5024
5025 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,batch_eq_8)5026 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, batch_eq_8) {
5027 VAddCMicrokernelTester()
5028 .batch_size(8)
5029 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5030 }
5031
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,batch_div_8)5032 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, batch_div_8) {
5033 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
5034 VAddCMicrokernelTester()
5035 .batch_size(batch_size)
5036 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5037 }
5038 }
5039
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,batch_lt_8)5040 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, batch_lt_8) {
5041 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
5042 VAddCMicrokernelTester()
5043 .batch_size(batch_size)
5044 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5045 }
5046 }
5047
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,batch_gt_8)5048 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, batch_gt_8) {
5049 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
5050 VAddCMicrokernelTester()
5051 .batch_size(batch_size)
5052 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5053 }
5054 }
5055
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,inplace)5056 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, inplace) {
5057 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5058 VAddCMicrokernelTester()
5059 .batch_size(batch_size)
5060 .inplace(true)
5061 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5062 }
5063 }
5064
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,a_zero_point)5065 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, a_zero_point) {
5066 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5067 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5068 VAddCMicrokernelTester()
5069 .batch_size(batch_size)
5070 .a_zero_point(a_zero_point)
5071 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5072 }
5073 }
5074 }
5075
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,b_zero_point)5076 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, b_zero_point) {
5077 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5078 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5079 VAddCMicrokernelTester()
5080 .batch_size(batch_size)
5081 .b_zero_point(b_zero_point)
5082 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5083 }
5084 }
5085 }
5086
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,y_zero_point)5087 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, y_zero_point) {
5088 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5089 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5090 VAddCMicrokernelTester()
5091 .batch_size(batch_size)
5092 .y_zero_point(y_zero_point)
5093 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5094 }
5095 }
5096 }
5097
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,a_scale)5098 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, a_scale) {
5099 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5100 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5101 VAddCMicrokernelTester()
5102 .batch_size(batch_size)
5103 .a_scale(a_scale)
5104 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5105 }
5106 }
5107 }
5108
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,b_scale)5109 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, b_scale) {
5110 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5111 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5112 VAddCMicrokernelTester()
5113 .batch_size(batch_size)
5114 .b_scale(b_scale)
5115 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5116 }
5117 }
5118 }
5119
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,y_scale)5120 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, y_scale) {
5121 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5122 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5123 VAddCMicrokernelTester()
5124 .batch_size(batch_size)
5125 .y_scale(y_scale)
5126 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5127 }
5128 }
5129 }
5130
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,qmin)5131 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, qmin) {
5132 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5133 VAddCMicrokernelTester()
5134 .batch_size(batch_size)
5135 .qmin(128)
5136 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5137 }
5138 }
5139
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,qmax)5140 TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, qmax) {
5141 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5142 VAddCMicrokernelTester()
5143 .batch_size(batch_size)
5144 .qmax(128)
5145 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5146 }
5147 }
5148 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5149
5150
5151 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,batch_eq_16)5152 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, batch_eq_16) {
5153 VAddCMicrokernelTester()
5154 .batch_size(16)
5155 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5156 }
5157
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,batch_div_16)5158 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, batch_div_16) {
5159 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
5160 VAddCMicrokernelTester()
5161 .batch_size(batch_size)
5162 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5163 }
5164 }
5165
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,batch_lt_16)5166 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, batch_lt_16) {
5167 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
5168 VAddCMicrokernelTester()
5169 .batch_size(batch_size)
5170 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5171 }
5172 }
5173
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,batch_gt_16)5174 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, batch_gt_16) {
5175 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
5176 VAddCMicrokernelTester()
5177 .batch_size(batch_size)
5178 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5179 }
5180 }
5181
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,inplace)5182 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, inplace) {
5183 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5184 VAddCMicrokernelTester()
5185 .batch_size(batch_size)
5186 .inplace(true)
5187 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5188 }
5189 }
5190
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,a_zero_point)5191 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, a_zero_point) {
5192 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5193 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5194 VAddCMicrokernelTester()
5195 .batch_size(batch_size)
5196 .a_zero_point(a_zero_point)
5197 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5198 }
5199 }
5200 }
5201
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,b_zero_point)5202 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, b_zero_point) {
5203 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5204 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5205 VAddCMicrokernelTester()
5206 .batch_size(batch_size)
5207 .b_zero_point(b_zero_point)
5208 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5209 }
5210 }
5211 }
5212
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,y_zero_point)5213 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, y_zero_point) {
5214 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5215 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5216 VAddCMicrokernelTester()
5217 .batch_size(batch_size)
5218 .y_zero_point(y_zero_point)
5219 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5220 }
5221 }
5222 }
5223
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,a_scale)5224 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, a_scale) {
5225 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5226 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5227 VAddCMicrokernelTester()
5228 .batch_size(batch_size)
5229 .a_scale(a_scale)
5230 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5231 }
5232 }
5233 }
5234
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,b_scale)5235 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, b_scale) {
5236 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5237 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5238 VAddCMicrokernelTester()
5239 .batch_size(batch_size)
5240 .b_scale(b_scale)
5241 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5242 }
5243 }
5244 }
5245
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,y_scale)5246 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, y_scale) {
5247 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5248 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5249 VAddCMicrokernelTester()
5250 .batch_size(batch_size)
5251 .y_scale(y_scale)
5252 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5253 }
5254 }
5255 }
5256
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,qmin)5257 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, qmin) {
5258 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5259 VAddCMicrokernelTester()
5260 .batch_size(batch_size)
5261 .qmin(128)
5262 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5263 }
5264 }
5265
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,qmax)5266 TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, qmax) {
5267 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5268 VAddCMicrokernelTester()
5269 .batch_size(batch_size)
5270 .qmax(128)
5271 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5272 }
5273 }
5274 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5275
5276
5277 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,batch_eq_24)5278 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, batch_eq_24) {
5279 VAddCMicrokernelTester()
5280 .batch_size(24)
5281 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5282 }
5283
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,batch_div_24)5284 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, batch_div_24) {
5285 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
5286 VAddCMicrokernelTester()
5287 .batch_size(batch_size)
5288 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5289 }
5290 }
5291
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,batch_lt_24)5292 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, batch_lt_24) {
5293 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
5294 VAddCMicrokernelTester()
5295 .batch_size(batch_size)
5296 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5297 }
5298 }
5299
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,batch_gt_24)5300 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, batch_gt_24) {
5301 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
5302 VAddCMicrokernelTester()
5303 .batch_size(batch_size)
5304 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5305 }
5306 }
5307
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,inplace)5308 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, inplace) {
5309 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5310 VAddCMicrokernelTester()
5311 .batch_size(batch_size)
5312 .inplace(true)
5313 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5314 }
5315 }
5316
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,a_zero_point)5317 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, a_zero_point) {
5318 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5319 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5320 VAddCMicrokernelTester()
5321 .batch_size(batch_size)
5322 .a_zero_point(a_zero_point)
5323 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5324 }
5325 }
5326 }
5327
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,b_zero_point)5328 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, b_zero_point) {
5329 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5330 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5331 VAddCMicrokernelTester()
5332 .batch_size(batch_size)
5333 .b_zero_point(b_zero_point)
5334 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5335 }
5336 }
5337 }
5338
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,y_zero_point)5339 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, y_zero_point) {
5340 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5341 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5342 VAddCMicrokernelTester()
5343 .batch_size(batch_size)
5344 .y_zero_point(y_zero_point)
5345 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5346 }
5347 }
5348 }
5349
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,a_scale)5350 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, a_scale) {
5351 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5352 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5353 VAddCMicrokernelTester()
5354 .batch_size(batch_size)
5355 .a_scale(a_scale)
5356 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5357 }
5358 }
5359 }
5360
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,b_scale)5361 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, b_scale) {
5362 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5363 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5364 VAddCMicrokernelTester()
5365 .batch_size(batch_size)
5366 .b_scale(b_scale)
5367 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5368 }
5369 }
5370 }
5371
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,y_scale)5372 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, y_scale) {
5373 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5374 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5375 VAddCMicrokernelTester()
5376 .batch_size(batch_size)
5377 .y_scale(y_scale)
5378 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5379 }
5380 }
5381 }
5382
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,qmin)5383 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, qmin) {
5384 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5385 VAddCMicrokernelTester()
5386 .batch_size(batch_size)
5387 .qmin(128)
5388 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5389 }
5390 }
5391
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,qmax)5392 TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, qmax) {
5393 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5394 VAddCMicrokernelTester()
5395 .batch_size(batch_size)
5396 .qmax(128)
5397 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5398 }
5399 }
5400 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5401
5402
5403 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,batch_eq_32)5404 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, batch_eq_32) {
5405 VAddCMicrokernelTester()
5406 .batch_size(32)
5407 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5408 }
5409
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,batch_div_32)5410 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, batch_div_32) {
5411 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
5412 VAddCMicrokernelTester()
5413 .batch_size(batch_size)
5414 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5415 }
5416 }
5417
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,batch_lt_32)5418 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, batch_lt_32) {
5419 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
5420 VAddCMicrokernelTester()
5421 .batch_size(batch_size)
5422 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5423 }
5424 }
5425
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,batch_gt_32)5426 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, batch_gt_32) {
5427 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
5428 VAddCMicrokernelTester()
5429 .batch_size(batch_size)
5430 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5431 }
5432 }
5433
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,inplace)5434 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, inplace) {
5435 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5436 VAddCMicrokernelTester()
5437 .batch_size(batch_size)
5438 .inplace(true)
5439 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5440 }
5441 }
5442
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,a_zero_point)5443 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, a_zero_point) {
5444 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5445 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5446 VAddCMicrokernelTester()
5447 .batch_size(batch_size)
5448 .a_zero_point(a_zero_point)
5449 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5450 }
5451 }
5452 }
5453
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,b_zero_point)5454 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, b_zero_point) {
5455 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5456 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5457 VAddCMicrokernelTester()
5458 .batch_size(batch_size)
5459 .b_zero_point(b_zero_point)
5460 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5461 }
5462 }
5463 }
5464
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,y_zero_point)5465 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, y_zero_point) {
5466 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5467 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5468 VAddCMicrokernelTester()
5469 .batch_size(batch_size)
5470 .y_zero_point(y_zero_point)
5471 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5472 }
5473 }
5474 }
5475
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,a_scale)5476 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, a_scale) {
5477 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5478 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5479 VAddCMicrokernelTester()
5480 .batch_size(batch_size)
5481 .a_scale(a_scale)
5482 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5483 }
5484 }
5485 }
5486
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,b_scale)5487 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, b_scale) {
5488 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5489 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5490 VAddCMicrokernelTester()
5491 .batch_size(batch_size)
5492 .b_scale(b_scale)
5493 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5494 }
5495 }
5496 }
5497
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,y_scale)5498 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, y_scale) {
5499 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5500 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5501 VAddCMicrokernelTester()
5502 .batch_size(batch_size)
5503 .y_scale(y_scale)
5504 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5505 }
5506 }
5507 }
5508
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,qmin)5509 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, qmin) {
5510 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5511 VAddCMicrokernelTester()
5512 .batch_size(batch_size)
5513 .qmin(128)
5514 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5515 }
5516 }
5517
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,qmax)5518 TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, qmax) {
5519 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5520 VAddCMicrokernelTester()
5521 .batch_size(batch_size)
5522 .qmax(128)
5523 .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5524 }
5525 }
5526 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5527
5528
TEST(QS8_VADDC_MINMAX__SCALAR_X1,batch_eq_1)5529 TEST(QS8_VADDC_MINMAX__SCALAR_X1, batch_eq_1) {
5530 VAddCMicrokernelTester()
5531 .batch_size(1)
5532 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
5533 }
5534
TEST(QS8_VADDC_MINMAX__SCALAR_X1,batch_gt_1)5535 TEST(QS8_VADDC_MINMAX__SCALAR_X1, batch_gt_1) {
5536 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
5537 VAddCMicrokernelTester()
5538 .batch_size(batch_size)
5539 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
5540 }
5541 }
5542
TEST(QS8_VADDC_MINMAX__SCALAR_X1,inplace)5543 TEST(QS8_VADDC_MINMAX__SCALAR_X1, inplace) {
5544 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
5545 VAddCMicrokernelTester()
5546 .batch_size(batch_size)
5547 .inplace(true)
5548 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
5549 }
5550 }
5551
TEST(QS8_VADDC_MINMAX__SCALAR_X1,a_zero_point)5552 TEST(QS8_VADDC_MINMAX__SCALAR_X1, a_zero_point) {
5553 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
5554 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5555 VAddCMicrokernelTester()
5556 .batch_size(batch_size)
5557 .a_zero_point(a_zero_point)
5558 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
5559 }
5560 }
5561 }
5562
TEST(QS8_VADDC_MINMAX__SCALAR_X1,b_zero_point)5563 TEST(QS8_VADDC_MINMAX__SCALAR_X1, b_zero_point) {
5564 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
5565 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5566 VAddCMicrokernelTester()
5567 .batch_size(batch_size)
5568 .b_zero_point(b_zero_point)
5569 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
5570 }
5571 }
5572 }
5573
TEST(QS8_VADDC_MINMAX__SCALAR_X1,y_zero_point)5574 TEST(QS8_VADDC_MINMAX__SCALAR_X1, y_zero_point) {
5575 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
5576 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5577 VAddCMicrokernelTester()
5578 .batch_size(batch_size)
5579 .y_zero_point(y_zero_point)
5580 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
5581 }
5582 }
5583 }
5584
TEST(QS8_VADDC_MINMAX__SCALAR_X1,a_scale)5585 TEST(QS8_VADDC_MINMAX__SCALAR_X1, a_scale) {
5586 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
5587 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5588 VAddCMicrokernelTester()
5589 .batch_size(batch_size)
5590 .a_scale(a_scale)
5591 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
5592 }
5593 }
5594 }
5595
TEST(QS8_VADDC_MINMAX__SCALAR_X1,b_scale)5596 TEST(QS8_VADDC_MINMAX__SCALAR_X1, b_scale) {
5597 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
5598 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5599 VAddCMicrokernelTester()
5600 .batch_size(batch_size)
5601 .b_scale(b_scale)
5602 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
5603 }
5604 }
5605 }
5606
TEST(QS8_VADDC_MINMAX__SCALAR_X1,y_scale)5607 TEST(QS8_VADDC_MINMAX__SCALAR_X1, y_scale) {
5608 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
5609 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5610 VAddCMicrokernelTester()
5611 .batch_size(batch_size)
5612 .y_scale(y_scale)
5613 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
5614 }
5615 }
5616 }
5617
TEST(QS8_VADDC_MINMAX__SCALAR_X1,qmin)5618 TEST(QS8_VADDC_MINMAX__SCALAR_X1, qmin) {
5619 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
5620 VAddCMicrokernelTester()
5621 .batch_size(batch_size)
5622 .qmin(128)
5623 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
5624 }
5625 }
5626
TEST(QS8_VADDC_MINMAX__SCALAR_X1,qmax)5627 TEST(QS8_VADDC_MINMAX__SCALAR_X1, qmax) {
5628 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
5629 VAddCMicrokernelTester()
5630 .batch_size(batch_size)
5631 .qmax(128)
5632 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
5633 }
5634 }
5635
TEST(QS8_VADDC_MINMAX__SCALAR_X2,batch_eq_2)5636 TEST(QS8_VADDC_MINMAX__SCALAR_X2, batch_eq_2) {
5637 VAddCMicrokernelTester()
5638 .batch_size(2)
5639 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5640 }
5641
TEST(QS8_VADDC_MINMAX__SCALAR_X2,batch_div_2)5642 TEST(QS8_VADDC_MINMAX__SCALAR_X2, batch_div_2) {
5643 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
5644 VAddCMicrokernelTester()
5645 .batch_size(batch_size)
5646 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5647 }
5648 }
5649
TEST(QS8_VADDC_MINMAX__SCALAR_X2,batch_lt_2)5650 TEST(QS8_VADDC_MINMAX__SCALAR_X2, batch_lt_2) {
5651 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
5652 VAddCMicrokernelTester()
5653 .batch_size(batch_size)
5654 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5655 }
5656 }
5657
TEST(QS8_VADDC_MINMAX__SCALAR_X2,batch_gt_2)5658 TEST(QS8_VADDC_MINMAX__SCALAR_X2, batch_gt_2) {
5659 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
5660 VAddCMicrokernelTester()
5661 .batch_size(batch_size)
5662 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5663 }
5664 }
5665
TEST(QS8_VADDC_MINMAX__SCALAR_X2,inplace)5666 TEST(QS8_VADDC_MINMAX__SCALAR_X2, inplace) {
5667 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
5668 VAddCMicrokernelTester()
5669 .batch_size(batch_size)
5670 .inplace(true)
5671 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5672 }
5673 }
5674
TEST(QS8_VADDC_MINMAX__SCALAR_X2,a_zero_point)5675 TEST(QS8_VADDC_MINMAX__SCALAR_X2, a_zero_point) {
5676 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
5677 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5678 VAddCMicrokernelTester()
5679 .batch_size(batch_size)
5680 .a_zero_point(a_zero_point)
5681 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5682 }
5683 }
5684 }
5685
TEST(QS8_VADDC_MINMAX__SCALAR_X2,b_zero_point)5686 TEST(QS8_VADDC_MINMAX__SCALAR_X2, b_zero_point) {
5687 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
5688 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5689 VAddCMicrokernelTester()
5690 .batch_size(batch_size)
5691 .b_zero_point(b_zero_point)
5692 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5693 }
5694 }
5695 }
5696
TEST(QS8_VADDC_MINMAX__SCALAR_X2,y_zero_point)5697 TEST(QS8_VADDC_MINMAX__SCALAR_X2, y_zero_point) {
5698 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
5699 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5700 VAddCMicrokernelTester()
5701 .batch_size(batch_size)
5702 .y_zero_point(y_zero_point)
5703 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5704 }
5705 }
5706 }
5707
TEST(QS8_VADDC_MINMAX__SCALAR_X2,a_scale)5708 TEST(QS8_VADDC_MINMAX__SCALAR_X2, a_scale) {
5709 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
5710 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5711 VAddCMicrokernelTester()
5712 .batch_size(batch_size)
5713 .a_scale(a_scale)
5714 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5715 }
5716 }
5717 }
5718
TEST(QS8_VADDC_MINMAX__SCALAR_X2,b_scale)5719 TEST(QS8_VADDC_MINMAX__SCALAR_X2, b_scale) {
5720 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
5721 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5722 VAddCMicrokernelTester()
5723 .batch_size(batch_size)
5724 .b_scale(b_scale)
5725 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5726 }
5727 }
5728 }
5729
TEST(QS8_VADDC_MINMAX__SCALAR_X2,y_scale)5730 TEST(QS8_VADDC_MINMAX__SCALAR_X2, y_scale) {
5731 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
5732 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5733 VAddCMicrokernelTester()
5734 .batch_size(batch_size)
5735 .y_scale(y_scale)
5736 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5737 }
5738 }
5739 }
5740
TEST(QS8_VADDC_MINMAX__SCALAR_X2,qmin)5741 TEST(QS8_VADDC_MINMAX__SCALAR_X2, qmin) {
5742 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
5743 VAddCMicrokernelTester()
5744 .batch_size(batch_size)
5745 .qmin(128)
5746 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5747 }
5748 }
5749
TEST(QS8_VADDC_MINMAX__SCALAR_X2,qmax)5750 TEST(QS8_VADDC_MINMAX__SCALAR_X2, qmax) {
5751 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
5752 VAddCMicrokernelTester()
5753 .batch_size(batch_size)
5754 .qmax(128)
5755 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5756 }
5757 }
5758
TEST(QS8_VADDC_MINMAX__SCALAR_X4,batch_eq_4)5759 TEST(QS8_VADDC_MINMAX__SCALAR_X4, batch_eq_4) {
5760 VAddCMicrokernelTester()
5761 .batch_size(4)
5762 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5763 }
5764
TEST(QS8_VADDC_MINMAX__SCALAR_X4,batch_div_4)5765 TEST(QS8_VADDC_MINMAX__SCALAR_X4, batch_div_4) {
5766 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
5767 VAddCMicrokernelTester()
5768 .batch_size(batch_size)
5769 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5770 }
5771 }
5772
TEST(QS8_VADDC_MINMAX__SCALAR_X4,batch_lt_4)5773 TEST(QS8_VADDC_MINMAX__SCALAR_X4, batch_lt_4) {
5774 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
5775 VAddCMicrokernelTester()
5776 .batch_size(batch_size)
5777 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5778 }
5779 }
5780
TEST(QS8_VADDC_MINMAX__SCALAR_X4,batch_gt_4)5781 TEST(QS8_VADDC_MINMAX__SCALAR_X4, batch_gt_4) {
5782 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
5783 VAddCMicrokernelTester()
5784 .batch_size(batch_size)
5785 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5786 }
5787 }
5788
TEST(QS8_VADDC_MINMAX__SCALAR_X4,inplace)5789 TEST(QS8_VADDC_MINMAX__SCALAR_X4, inplace) {
5790 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
5791 VAddCMicrokernelTester()
5792 .batch_size(batch_size)
5793 .inplace(true)
5794 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5795 }
5796 }
5797
TEST(QS8_VADDC_MINMAX__SCALAR_X4,a_zero_point)5798 TEST(QS8_VADDC_MINMAX__SCALAR_X4, a_zero_point) {
5799 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
5800 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5801 VAddCMicrokernelTester()
5802 .batch_size(batch_size)
5803 .a_zero_point(a_zero_point)
5804 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5805 }
5806 }
5807 }
5808
TEST(QS8_VADDC_MINMAX__SCALAR_X4,b_zero_point)5809 TEST(QS8_VADDC_MINMAX__SCALAR_X4, b_zero_point) {
5810 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
5811 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5812 VAddCMicrokernelTester()
5813 .batch_size(batch_size)
5814 .b_zero_point(b_zero_point)
5815 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5816 }
5817 }
5818 }
5819
TEST(QS8_VADDC_MINMAX__SCALAR_X4,y_zero_point)5820 TEST(QS8_VADDC_MINMAX__SCALAR_X4, y_zero_point) {
5821 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
5822 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5823 VAddCMicrokernelTester()
5824 .batch_size(batch_size)
5825 .y_zero_point(y_zero_point)
5826 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5827 }
5828 }
5829 }
5830
TEST(QS8_VADDC_MINMAX__SCALAR_X4,a_scale)5831 TEST(QS8_VADDC_MINMAX__SCALAR_X4, a_scale) {
5832 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
5833 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5834 VAddCMicrokernelTester()
5835 .batch_size(batch_size)
5836 .a_scale(a_scale)
5837 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5838 }
5839 }
5840 }
5841
TEST(QS8_VADDC_MINMAX__SCALAR_X4,b_scale)5842 TEST(QS8_VADDC_MINMAX__SCALAR_X4, b_scale) {
5843 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
5844 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5845 VAddCMicrokernelTester()
5846 .batch_size(batch_size)
5847 .b_scale(b_scale)
5848 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5849 }
5850 }
5851 }
5852
TEST(QS8_VADDC_MINMAX__SCALAR_X4,y_scale)5853 TEST(QS8_VADDC_MINMAX__SCALAR_X4, y_scale) {
5854 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
5855 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5856 VAddCMicrokernelTester()
5857 .batch_size(batch_size)
5858 .y_scale(y_scale)
5859 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5860 }
5861 }
5862 }
5863
TEST(QS8_VADDC_MINMAX__SCALAR_X4,qmin)5864 TEST(QS8_VADDC_MINMAX__SCALAR_X4, qmin) {
5865 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
5866 VAddCMicrokernelTester()
5867 .batch_size(batch_size)
5868 .qmin(128)
5869 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5870 }
5871 }
5872
TEST(QS8_VADDC_MINMAX__SCALAR_X4,qmax)5873 TEST(QS8_VADDC_MINMAX__SCALAR_X4, qmax) {
5874 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
5875 VAddCMicrokernelTester()
5876 .batch_size(batch_size)
5877 .qmax(128)
5878 .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5879 }
5880 }