1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 // Specification: test/qu8-vaddc-minmax.yaml
8 // Generator: tools/generate-vbinary-test.py
9
10
11 #include <gtest/gtest.h>
12
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15
16 #include <xnnpack/microparams-init.h>
17 #include <xnnpack/vadd.h>
18 #include "vaddc-microkernel-tester.h"
19
20
21 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_VADDC_MINMAX__NEON_LD64_X8,batch_eq_8)22 TEST(QU8_VADDC_MINMAX__NEON_LD64_X8, batch_eq_8) {
23 TEST_REQUIRES_ARM_NEON;
24 VAddCMicrokernelTester()
25 .batch_size(8)
26 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
27 }
28
TEST(QU8_VADDC_MINMAX__NEON_LD64_X8,batch_div_8)29 TEST(QU8_VADDC_MINMAX__NEON_LD64_X8, batch_div_8) {
30 TEST_REQUIRES_ARM_NEON;
31 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
32 VAddCMicrokernelTester()
33 .batch_size(batch_size)
34 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
35 }
36 }
37
TEST(QU8_VADDC_MINMAX__NEON_LD64_X8,batch_lt_8)38 TEST(QU8_VADDC_MINMAX__NEON_LD64_X8, batch_lt_8) {
39 TEST_REQUIRES_ARM_NEON;
40 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
41 VAddCMicrokernelTester()
42 .batch_size(batch_size)
43 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
44 }
45 }
46
TEST(QU8_VADDC_MINMAX__NEON_LD64_X8,batch_gt_8)47 TEST(QU8_VADDC_MINMAX__NEON_LD64_X8, batch_gt_8) {
48 TEST_REQUIRES_ARM_NEON;
49 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
50 VAddCMicrokernelTester()
51 .batch_size(batch_size)
52 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
53 }
54 }
55
TEST(QU8_VADDC_MINMAX__NEON_LD64_X8,inplace)56 TEST(QU8_VADDC_MINMAX__NEON_LD64_X8, inplace) {
57 TEST_REQUIRES_ARM_NEON;
58 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
59 VAddCMicrokernelTester()
60 .batch_size(batch_size)
61 .inplace(true)
62 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
63 }
64 }
65
TEST(QU8_VADDC_MINMAX__NEON_LD64_X8,a_zero_point)66 TEST(QU8_VADDC_MINMAX__NEON_LD64_X8, a_zero_point) {
67 TEST_REQUIRES_ARM_NEON;
68 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
69 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
70 VAddCMicrokernelTester()
71 .batch_size(batch_size)
72 .a_zero_point(a_zero_point)
73 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
74 }
75 }
76 }
77
TEST(QU8_VADDC_MINMAX__NEON_LD64_X8,b_zero_point)78 TEST(QU8_VADDC_MINMAX__NEON_LD64_X8, b_zero_point) {
79 TEST_REQUIRES_ARM_NEON;
80 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
81 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
82 VAddCMicrokernelTester()
83 .batch_size(batch_size)
84 .b_zero_point(b_zero_point)
85 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
86 }
87 }
88 }
89
TEST(QU8_VADDC_MINMAX__NEON_LD64_X8,y_zero_point)90 TEST(QU8_VADDC_MINMAX__NEON_LD64_X8, y_zero_point) {
91 TEST_REQUIRES_ARM_NEON;
92 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
93 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
94 VAddCMicrokernelTester()
95 .batch_size(batch_size)
96 .y_zero_point(y_zero_point)
97 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
98 }
99 }
100 }
101
TEST(QU8_VADDC_MINMAX__NEON_LD64_X8,a_scale)102 TEST(QU8_VADDC_MINMAX__NEON_LD64_X8, a_scale) {
103 TEST_REQUIRES_ARM_NEON;
104 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
105 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
106 VAddCMicrokernelTester()
107 .batch_size(batch_size)
108 .a_scale(a_scale)
109 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
110 }
111 }
112 }
113
TEST(QU8_VADDC_MINMAX__NEON_LD64_X8,b_scale)114 TEST(QU8_VADDC_MINMAX__NEON_LD64_X8, b_scale) {
115 TEST_REQUIRES_ARM_NEON;
116 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
117 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
118 VAddCMicrokernelTester()
119 .batch_size(batch_size)
120 .b_scale(b_scale)
121 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
122 }
123 }
124 }
125
TEST(QU8_VADDC_MINMAX__NEON_LD64_X8,y_scale)126 TEST(QU8_VADDC_MINMAX__NEON_LD64_X8, y_scale) {
127 TEST_REQUIRES_ARM_NEON;
128 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
129 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
130 VAddCMicrokernelTester()
131 .batch_size(batch_size)
132 .y_scale(y_scale)
133 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
134 }
135 }
136 }
137
TEST(QU8_VADDC_MINMAX__NEON_LD64_X8,qmin)138 TEST(QU8_VADDC_MINMAX__NEON_LD64_X8, qmin) {
139 TEST_REQUIRES_ARM_NEON;
140 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
141 VAddCMicrokernelTester()
142 .batch_size(batch_size)
143 .qmin(128)
144 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
145 }
146 }
147
TEST(QU8_VADDC_MINMAX__NEON_LD64_X8,qmax)148 TEST(QU8_VADDC_MINMAX__NEON_LD64_X8, qmax) {
149 TEST_REQUIRES_ARM_NEON;
150 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
151 VAddCMicrokernelTester()
152 .batch_size(batch_size)
153 .qmax(128)
154 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qu8_add_minmax_neon_params);
155 }
156 }
157 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
158
159
160 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_VADDC_MINMAX__NEON_LD64_X16,batch_eq_16)161 TEST(QU8_VADDC_MINMAX__NEON_LD64_X16, batch_eq_16) {
162 TEST_REQUIRES_ARM_NEON;
163 VAddCMicrokernelTester()
164 .batch_size(16)
165 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
166 }
167
TEST(QU8_VADDC_MINMAX__NEON_LD64_X16,batch_div_16)168 TEST(QU8_VADDC_MINMAX__NEON_LD64_X16, batch_div_16) {
169 TEST_REQUIRES_ARM_NEON;
170 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
171 VAddCMicrokernelTester()
172 .batch_size(batch_size)
173 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
174 }
175 }
176
TEST(QU8_VADDC_MINMAX__NEON_LD64_X16,batch_lt_16)177 TEST(QU8_VADDC_MINMAX__NEON_LD64_X16, batch_lt_16) {
178 TEST_REQUIRES_ARM_NEON;
179 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
180 VAddCMicrokernelTester()
181 .batch_size(batch_size)
182 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
183 }
184 }
185
TEST(QU8_VADDC_MINMAX__NEON_LD64_X16,batch_gt_16)186 TEST(QU8_VADDC_MINMAX__NEON_LD64_X16, batch_gt_16) {
187 TEST_REQUIRES_ARM_NEON;
188 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
189 VAddCMicrokernelTester()
190 .batch_size(batch_size)
191 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
192 }
193 }
194
TEST(QU8_VADDC_MINMAX__NEON_LD64_X16,inplace)195 TEST(QU8_VADDC_MINMAX__NEON_LD64_X16, inplace) {
196 TEST_REQUIRES_ARM_NEON;
197 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
198 VAddCMicrokernelTester()
199 .batch_size(batch_size)
200 .inplace(true)
201 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
202 }
203 }
204
TEST(QU8_VADDC_MINMAX__NEON_LD64_X16,a_zero_point)205 TEST(QU8_VADDC_MINMAX__NEON_LD64_X16, a_zero_point) {
206 TEST_REQUIRES_ARM_NEON;
207 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
208 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
209 VAddCMicrokernelTester()
210 .batch_size(batch_size)
211 .a_zero_point(a_zero_point)
212 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
213 }
214 }
215 }
216
TEST(QU8_VADDC_MINMAX__NEON_LD64_X16,b_zero_point)217 TEST(QU8_VADDC_MINMAX__NEON_LD64_X16, b_zero_point) {
218 TEST_REQUIRES_ARM_NEON;
219 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
220 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
221 VAddCMicrokernelTester()
222 .batch_size(batch_size)
223 .b_zero_point(b_zero_point)
224 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
225 }
226 }
227 }
228
TEST(QU8_VADDC_MINMAX__NEON_LD64_X16,y_zero_point)229 TEST(QU8_VADDC_MINMAX__NEON_LD64_X16, y_zero_point) {
230 TEST_REQUIRES_ARM_NEON;
231 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
232 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
233 VAddCMicrokernelTester()
234 .batch_size(batch_size)
235 .y_zero_point(y_zero_point)
236 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
237 }
238 }
239 }
240
TEST(QU8_VADDC_MINMAX__NEON_LD64_X16,a_scale)241 TEST(QU8_VADDC_MINMAX__NEON_LD64_X16, a_scale) {
242 TEST_REQUIRES_ARM_NEON;
243 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
244 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
245 VAddCMicrokernelTester()
246 .batch_size(batch_size)
247 .a_scale(a_scale)
248 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
249 }
250 }
251 }
252
TEST(QU8_VADDC_MINMAX__NEON_LD64_X16,b_scale)253 TEST(QU8_VADDC_MINMAX__NEON_LD64_X16, b_scale) {
254 TEST_REQUIRES_ARM_NEON;
255 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
256 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
257 VAddCMicrokernelTester()
258 .batch_size(batch_size)
259 .b_scale(b_scale)
260 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
261 }
262 }
263 }
264
TEST(QU8_VADDC_MINMAX__NEON_LD64_X16,y_scale)265 TEST(QU8_VADDC_MINMAX__NEON_LD64_X16, y_scale) {
266 TEST_REQUIRES_ARM_NEON;
267 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
268 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
269 VAddCMicrokernelTester()
270 .batch_size(batch_size)
271 .y_scale(y_scale)
272 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
273 }
274 }
275 }
276
TEST(QU8_VADDC_MINMAX__NEON_LD64_X16,qmin)277 TEST(QU8_VADDC_MINMAX__NEON_LD64_X16, qmin) {
278 TEST_REQUIRES_ARM_NEON;
279 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
280 VAddCMicrokernelTester()
281 .batch_size(batch_size)
282 .qmin(128)
283 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
284 }
285 }
286
TEST(QU8_VADDC_MINMAX__NEON_LD64_X16,qmax)287 TEST(QU8_VADDC_MINMAX__NEON_LD64_X16, qmax) {
288 TEST_REQUIRES_ARM_NEON;
289 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
290 VAddCMicrokernelTester()
291 .batch_size(batch_size)
292 .qmax(128)
293 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qu8_add_minmax_neon_params);
294 }
295 }
296 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
297
298
299 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_VADDC_MINMAX__NEON_LD64_X32,batch_eq_32)300 TEST(QU8_VADDC_MINMAX__NEON_LD64_X32, batch_eq_32) {
301 TEST_REQUIRES_ARM_NEON;
302 VAddCMicrokernelTester()
303 .batch_size(32)
304 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
305 }
306
TEST(QU8_VADDC_MINMAX__NEON_LD64_X32,batch_div_32)307 TEST(QU8_VADDC_MINMAX__NEON_LD64_X32, batch_div_32) {
308 TEST_REQUIRES_ARM_NEON;
309 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
310 VAddCMicrokernelTester()
311 .batch_size(batch_size)
312 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
313 }
314 }
315
TEST(QU8_VADDC_MINMAX__NEON_LD64_X32,batch_lt_32)316 TEST(QU8_VADDC_MINMAX__NEON_LD64_X32, batch_lt_32) {
317 TEST_REQUIRES_ARM_NEON;
318 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
319 VAddCMicrokernelTester()
320 .batch_size(batch_size)
321 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
322 }
323 }
324
TEST(QU8_VADDC_MINMAX__NEON_LD64_X32,batch_gt_32)325 TEST(QU8_VADDC_MINMAX__NEON_LD64_X32, batch_gt_32) {
326 TEST_REQUIRES_ARM_NEON;
327 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
328 VAddCMicrokernelTester()
329 .batch_size(batch_size)
330 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
331 }
332 }
333
TEST(QU8_VADDC_MINMAX__NEON_LD64_X32,inplace)334 TEST(QU8_VADDC_MINMAX__NEON_LD64_X32, inplace) {
335 TEST_REQUIRES_ARM_NEON;
336 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
337 VAddCMicrokernelTester()
338 .batch_size(batch_size)
339 .inplace(true)
340 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
341 }
342 }
343
TEST(QU8_VADDC_MINMAX__NEON_LD64_X32,a_zero_point)344 TEST(QU8_VADDC_MINMAX__NEON_LD64_X32, a_zero_point) {
345 TEST_REQUIRES_ARM_NEON;
346 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
347 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
348 VAddCMicrokernelTester()
349 .batch_size(batch_size)
350 .a_zero_point(a_zero_point)
351 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
352 }
353 }
354 }
355
TEST(QU8_VADDC_MINMAX__NEON_LD64_X32,b_zero_point)356 TEST(QU8_VADDC_MINMAX__NEON_LD64_X32, b_zero_point) {
357 TEST_REQUIRES_ARM_NEON;
358 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
359 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
360 VAddCMicrokernelTester()
361 .batch_size(batch_size)
362 .b_zero_point(b_zero_point)
363 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
364 }
365 }
366 }
367
TEST(QU8_VADDC_MINMAX__NEON_LD64_X32,y_zero_point)368 TEST(QU8_VADDC_MINMAX__NEON_LD64_X32, y_zero_point) {
369 TEST_REQUIRES_ARM_NEON;
370 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
371 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
372 VAddCMicrokernelTester()
373 .batch_size(batch_size)
374 .y_zero_point(y_zero_point)
375 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
376 }
377 }
378 }
379
TEST(QU8_VADDC_MINMAX__NEON_LD64_X32,a_scale)380 TEST(QU8_VADDC_MINMAX__NEON_LD64_X32, a_scale) {
381 TEST_REQUIRES_ARM_NEON;
382 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
383 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
384 VAddCMicrokernelTester()
385 .batch_size(batch_size)
386 .a_scale(a_scale)
387 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
388 }
389 }
390 }
391
TEST(QU8_VADDC_MINMAX__NEON_LD64_X32,b_scale)392 TEST(QU8_VADDC_MINMAX__NEON_LD64_X32, b_scale) {
393 TEST_REQUIRES_ARM_NEON;
394 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
395 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
396 VAddCMicrokernelTester()
397 .batch_size(batch_size)
398 .b_scale(b_scale)
399 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
400 }
401 }
402 }
403
TEST(QU8_VADDC_MINMAX__NEON_LD64_X32,y_scale)404 TEST(QU8_VADDC_MINMAX__NEON_LD64_X32, y_scale) {
405 TEST_REQUIRES_ARM_NEON;
406 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
407 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
408 VAddCMicrokernelTester()
409 .batch_size(batch_size)
410 .y_scale(y_scale)
411 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
412 }
413 }
414 }
415
TEST(QU8_VADDC_MINMAX__NEON_LD64_X32,qmin)416 TEST(QU8_VADDC_MINMAX__NEON_LD64_X32, qmin) {
417 TEST_REQUIRES_ARM_NEON;
418 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
419 VAddCMicrokernelTester()
420 .batch_size(batch_size)
421 .qmin(128)
422 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
423 }
424 }
425
TEST(QU8_VADDC_MINMAX__NEON_LD64_X32,qmax)426 TEST(QU8_VADDC_MINMAX__NEON_LD64_X32, qmax) {
427 TEST_REQUIRES_ARM_NEON;
428 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
429 VAddCMicrokernelTester()
430 .batch_size(batch_size)
431 .qmax(128)
432 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qu8_add_minmax_neon_params);
433 }
434 }
435 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
436
437
438 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_VADDC_MINMAX__NEON_LD128_X16,batch_eq_16)439 TEST(QU8_VADDC_MINMAX__NEON_LD128_X16, batch_eq_16) {
440 TEST_REQUIRES_ARM_NEON;
441 VAddCMicrokernelTester()
442 .batch_size(16)
443 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
444 }
445
TEST(QU8_VADDC_MINMAX__NEON_LD128_X16,batch_div_16)446 TEST(QU8_VADDC_MINMAX__NEON_LD128_X16, batch_div_16) {
447 TEST_REQUIRES_ARM_NEON;
448 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
449 VAddCMicrokernelTester()
450 .batch_size(batch_size)
451 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
452 }
453 }
454
TEST(QU8_VADDC_MINMAX__NEON_LD128_X16,batch_lt_16)455 TEST(QU8_VADDC_MINMAX__NEON_LD128_X16, batch_lt_16) {
456 TEST_REQUIRES_ARM_NEON;
457 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
458 VAddCMicrokernelTester()
459 .batch_size(batch_size)
460 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
461 }
462 }
463
TEST(QU8_VADDC_MINMAX__NEON_LD128_X16,batch_gt_16)464 TEST(QU8_VADDC_MINMAX__NEON_LD128_X16, batch_gt_16) {
465 TEST_REQUIRES_ARM_NEON;
466 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
467 VAddCMicrokernelTester()
468 .batch_size(batch_size)
469 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
470 }
471 }
472
TEST(QU8_VADDC_MINMAX__NEON_LD128_X16,inplace)473 TEST(QU8_VADDC_MINMAX__NEON_LD128_X16, inplace) {
474 TEST_REQUIRES_ARM_NEON;
475 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
476 VAddCMicrokernelTester()
477 .batch_size(batch_size)
478 .inplace(true)
479 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
480 }
481 }
482
TEST(QU8_VADDC_MINMAX__NEON_LD128_X16,a_zero_point)483 TEST(QU8_VADDC_MINMAX__NEON_LD128_X16, a_zero_point) {
484 TEST_REQUIRES_ARM_NEON;
485 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
486 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
487 VAddCMicrokernelTester()
488 .batch_size(batch_size)
489 .a_zero_point(a_zero_point)
490 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
491 }
492 }
493 }
494
TEST(QU8_VADDC_MINMAX__NEON_LD128_X16,b_zero_point)495 TEST(QU8_VADDC_MINMAX__NEON_LD128_X16, b_zero_point) {
496 TEST_REQUIRES_ARM_NEON;
497 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
498 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
499 VAddCMicrokernelTester()
500 .batch_size(batch_size)
501 .b_zero_point(b_zero_point)
502 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
503 }
504 }
505 }
506
TEST(QU8_VADDC_MINMAX__NEON_LD128_X16,y_zero_point)507 TEST(QU8_VADDC_MINMAX__NEON_LD128_X16, y_zero_point) {
508 TEST_REQUIRES_ARM_NEON;
509 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
510 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
511 VAddCMicrokernelTester()
512 .batch_size(batch_size)
513 .y_zero_point(y_zero_point)
514 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
515 }
516 }
517 }
518
TEST(QU8_VADDC_MINMAX__NEON_LD128_X16,a_scale)519 TEST(QU8_VADDC_MINMAX__NEON_LD128_X16, a_scale) {
520 TEST_REQUIRES_ARM_NEON;
521 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
522 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
523 VAddCMicrokernelTester()
524 .batch_size(batch_size)
525 .a_scale(a_scale)
526 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
527 }
528 }
529 }
530
TEST(QU8_VADDC_MINMAX__NEON_LD128_X16,b_scale)531 TEST(QU8_VADDC_MINMAX__NEON_LD128_X16, b_scale) {
532 TEST_REQUIRES_ARM_NEON;
533 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
534 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
535 VAddCMicrokernelTester()
536 .batch_size(batch_size)
537 .b_scale(b_scale)
538 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
539 }
540 }
541 }
542
TEST(QU8_VADDC_MINMAX__NEON_LD128_X16,y_scale)543 TEST(QU8_VADDC_MINMAX__NEON_LD128_X16, y_scale) {
544 TEST_REQUIRES_ARM_NEON;
545 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
546 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
547 VAddCMicrokernelTester()
548 .batch_size(batch_size)
549 .y_scale(y_scale)
550 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
551 }
552 }
553 }
554
TEST(QU8_VADDC_MINMAX__NEON_LD128_X16,qmin)555 TEST(QU8_VADDC_MINMAX__NEON_LD128_X16, qmin) {
556 TEST_REQUIRES_ARM_NEON;
557 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
558 VAddCMicrokernelTester()
559 .batch_size(batch_size)
560 .qmin(128)
561 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
562 }
563 }
564
TEST(QU8_VADDC_MINMAX__NEON_LD128_X16,qmax)565 TEST(QU8_VADDC_MINMAX__NEON_LD128_X16, qmax) {
566 TEST_REQUIRES_ARM_NEON;
567 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
568 VAddCMicrokernelTester()
569 .batch_size(batch_size)
570 .qmax(128)
571 .Test(xnn_qu8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qu8_add_minmax_neon_params);
572 }
573 }
574 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
575
576
577 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,batch_eq_8)578 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, batch_eq_8) {
579 TEST_REQUIRES_X86_SSE2;
580 VAddCMicrokernelTester()
581 .batch_size(8)
582 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
583 }
584
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,batch_div_8)585 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, batch_div_8) {
586 TEST_REQUIRES_X86_SSE2;
587 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
588 VAddCMicrokernelTester()
589 .batch_size(batch_size)
590 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
591 }
592 }
593
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,batch_lt_8)594 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, batch_lt_8) {
595 TEST_REQUIRES_X86_SSE2;
596 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
597 VAddCMicrokernelTester()
598 .batch_size(batch_size)
599 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
600 }
601 }
602
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,batch_gt_8)603 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, batch_gt_8) {
604 TEST_REQUIRES_X86_SSE2;
605 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
606 VAddCMicrokernelTester()
607 .batch_size(batch_size)
608 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
609 }
610 }
611
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,inplace)612 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, inplace) {
613 TEST_REQUIRES_X86_SSE2;
614 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
615 VAddCMicrokernelTester()
616 .batch_size(batch_size)
617 .inplace(true)
618 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
619 }
620 }
621
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,a_zero_point)622 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, a_zero_point) {
623 TEST_REQUIRES_X86_SSE2;
624 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
625 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
626 VAddCMicrokernelTester()
627 .batch_size(batch_size)
628 .a_zero_point(a_zero_point)
629 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
630 }
631 }
632 }
633
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,b_zero_point)634 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, b_zero_point) {
635 TEST_REQUIRES_X86_SSE2;
636 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
637 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
638 VAddCMicrokernelTester()
639 .batch_size(batch_size)
640 .b_zero_point(b_zero_point)
641 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
642 }
643 }
644 }
645
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,y_zero_point)646 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, y_zero_point) {
647 TEST_REQUIRES_X86_SSE2;
648 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
649 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
650 VAddCMicrokernelTester()
651 .batch_size(batch_size)
652 .y_zero_point(y_zero_point)
653 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
654 }
655 }
656 }
657
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,a_scale)658 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, a_scale) {
659 TEST_REQUIRES_X86_SSE2;
660 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
661 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
662 VAddCMicrokernelTester()
663 .batch_size(batch_size)
664 .a_scale(a_scale)
665 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
666 }
667 }
668 }
669
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,b_scale)670 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, b_scale) {
671 TEST_REQUIRES_X86_SSE2;
672 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
673 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
674 VAddCMicrokernelTester()
675 .batch_size(batch_size)
676 .b_scale(b_scale)
677 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
678 }
679 }
680 }
681
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,y_scale)682 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, y_scale) {
683 TEST_REQUIRES_X86_SSE2;
684 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
685 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
686 VAddCMicrokernelTester()
687 .batch_size(batch_size)
688 .y_scale(y_scale)
689 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
690 }
691 }
692 }
693
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,qmin)694 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, qmin) {
695 TEST_REQUIRES_X86_SSE2;
696 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
697 VAddCMicrokernelTester()
698 .batch_size(batch_size)
699 .qmin(128)
700 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
701 }
702 }
703
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,qmax)704 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, qmax) {
705 TEST_REQUIRES_X86_SSE2;
706 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
707 VAddCMicrokernelTester()
708 .batch_size(batch_size)
709 .qmax(128)
710 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
711 }
712 }
713 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
714
715
716 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,batch_eq_16)717 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, batch_eq_16) {
718 TEST_REQUIRES_X86_SSE2;
719 VAddCMicrokernelTester()
720 .batch_size(16)
721 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
722 }
723
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,batch_div_16)724 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, batch_div_16) {
725 TEST_REQUIRES_X86_SSE2;
726 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
727 VAddCMicrokernelTester()
728 .batch_size(batch_size)
729 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
730 }
731 }
732
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,batch_lt_16)733 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, batch_lt_16) {
734 TEST_REQUIRES_X86_SSE2;
735 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
736 VAddCMicrokernelTester()
737 .batch_size(batch_size)
738 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
739 }
740 }
741
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,batch_gt_16)742 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, batch_gt_16) {
743 TEST_REQUIRES_X86_SSE2;
744 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
745 VAddCMicrokernelTester()
746 .batch_size(batch_size)
747 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
748 }
749 }
750
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,inplace)751 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, inplace) {
752 TEST_REQUIRES_X86_SSE2;
753 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
754 VAddCMicrokernelTester()
755 .batch_size(batch_size)
756 .inplace(true)
757 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
758 }
759 }
760
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,a_zero_point)761 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, a_zero_point) {
762 TEST_REQUIRES_X86_SSE2;
763 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
764 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
765 VAddCMicrokernelTester()
766 .batch_size(batch_size)
767 .a_zero_point(a_zero_point)
768 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
769 }
770 }
771 }
772
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,b_zero_point)773 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, b_zero_point) {
774 TEST_REQUIRES_X86_SSE2;
775 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
776 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
777 VAddCMicrokernelTester()
778 .batch_size(batch_size)
779 .b_zero_point(b_zero_point)
780 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
781 }
782 }
783 }
784
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,y_zero_point)785 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, y_zero_point) {
786 TEST_REQUIRES_X86_SSE2;
787 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
788 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
789 VAddCMicrokernelTester()
790 .batch_size(batch_size)
791 .y_zero_point(y_zero_point)
792 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
793 }
794 }
795 }
796
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,a_scale)797 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, a_scale) {
798 TEST_REQUIRES_X86_SSE2;
799 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
800 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
801 VAddCMicrokernelTester()
802 .batch_size(batch_size)
803 .a_scale(a_scale)
804 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
805 }
806 }
807 }
808
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,b_scale)809 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, b_scale) {
810 TEST_REQUIRES_X86_SSE2;
811 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
812 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
813 VAddCMicrokernelTester()
814 .batch_size(batch_size)
815 .b_scale(b_scale)
816 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
817 }
818 }
819 }
820
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,y_scale)821 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, y_scale) {
822 TEST_REQUIRES_X86_SSE2;
823 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
824 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
825 VAddCMicrokernelTester()
826 .batch_size(batch_size)
827 .y_scale(y_scale)
828 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
829 }
830 }
831 }
832
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,qmin)833 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, qmin) {
834 TEST_REQUIRES_X86_SSE2;
835 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
836 VAddCMicrokernelTester()
837 .batch_size(batch_size)
838 .qmin(128)
839 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
840 }
841 }
842
TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,qmax)843 TEST(QU8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, qmax) {
844 TEST_REQUIRES_X86_SSE2;
845 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
846 VAddCMicrokernelTester()
847 .batch_size(batch_size)
848 .qmax(128)
849 .Test(xnn_qu8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
850 }
851 }
852 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
853
854
855 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,batch_eq_8)856 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, batch_eq_8) {
857 TEST_REQUIRES_X86_SSE41;
858 VAddCMicrokernelTester()
859 .batch_size(8)
860 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
861 }
862
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,batch_div_8)863 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, batch_div_8) {
864 TEST_REQUIRES_X86_SSE41;
865 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
866 VAddCMicrokernelTester()
867 .batch_size(batch_size)
868 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
869 }
870 }
871
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,batch_lt_8)872 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, batch_lt_8) {
873 TEST_REQUIRES_X86_SSE41;
874 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
875 VAddCMicrokernelTester()
876 .batch_size(batch_size)
877 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
878 }
879 }
880
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,batch_gt_8)881 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, batch_gt_8) {
882 TEST_REQUIRES_X86_SSE41;
883 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
884 VAddCMicrokernelTester()
885 .batch_size(batch_size)
886 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
887 }
888 }
889
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,inplace)890 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, inplace) {
891 TEST_REQUIRES_X86_SSE41;
892 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
893 VAddCMicrokernelTester()
894 .batch_size(batch_size)
895 .inplace(true)
896 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
897 }
898 }
899
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,a_zero_point)900 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, a_zero_point) {
901 TEST_REQUIRES_X86_SSE41;
902 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
903 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
904 VAddCMicrokernelTester()
905 .batch_size(batch_size)
906 .a_zero_point(a_zero_point)
907 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
908 }
909 }
910 }
911
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,b_zero_point)912 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, b_zero_point) {
913 TEST_REQUIRES_X86_SSE41;
914 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
915 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
916 VAddCMicrokernelTester()
917 .batch_size(batch_size)
918 .b_zero_point(b_zero_point)
919 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
920 }
921 }
922 }
923
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,y_zero_point)924 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, y_zero_point) {
925 TEST_REQUIRES_X86_SSE41;
926 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
927 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
928 VAddCMicrokernelTester()
929 .batch_size(batch_size)
930 .y_zero_point(y_zero_point)
931 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
932 }
933 }
934 }
935
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,a_scale)936 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, a_scale) {
937 TEST_REQUIRES_X86_SSE41;
938 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
939 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
940 VAddCMicrokernelTester()
941 .batch_size(batch_size)
942 .a_scale(a_scale)
943 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
944 }
945 }
946 }
947
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,b_scale)948 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, b_scale) {
949 TEST_REQUIRES_X86_SSE41;
950 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
951 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
952 VAddCMicrokernelTester()
953 .batch_size(batch_size)
954 .b_scale(b_scale)
955 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
956 }
957 }
958 }
959
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,y_scale)960 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, y_scale) {
961 TEST_REQUIRES_X86_SSE41;
962 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
963 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
964 VAddCMicrokernelTester()
965 .batch_size(batch_size)
966 .y_scale(y_scale)
967 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
968 }
969 }
970 }
971
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,qmin)972 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, qmin) {
973 TEST_REQUIRES_X86_SSE41;
974 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
975 VAddCMicrokernelTester()
976 .batch_size(batch_size)
977 .qmin(128)
978 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
979 }
980 }
981
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,qmax)982 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, qmax) {
983 TEST_REQUIRES_X86_SSE41;
984 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
985 VAddCMicrokernelTester()
986 .batch_size(batch_size)
987 .qmax(128)
988 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
989 }
990 }
991 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
992
993
994 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,batch_eq_16)995 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, batch_eq_16) {
996 TEST_REQUIRES_X86_SSE41;
997 VAddCMicrokernelTester()
998 .batch_size(16)
999 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1000 }
1001
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,batch_div_16)1002 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, batch_div_16) {
1003 TEST_REQUIRES_X86_SSE41;
1004 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1005 VAddCMicrokernelTester()
1006 .batch_size(batch_size)
1007 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1008 }
1009 }
1010
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,batch_lt_16)1011 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, batch_lt_16) {
1012 TEST_REQUIRES_X86_SSE41;
1013 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1014 VAddCMicrokernelTester()
1015 .batch_size(batch_size)
1016 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1017 }
1018 }
1019
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,batch_gt_16)1020 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, batch_gt_16) {
1021 TEST_REQUIRES_X86_SSE41;
1022 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1023 VAddCMicrokernelTester()
1024 .batch_size(batch_size)
1025 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1026 }
1027 }
1028
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,inplace)1029 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, inplace) {
1030 TEST_REQUIRES_X86_SSE41;
1031 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1032 VAddCMicrokernelTester()
1033 .batch_size(batch_size)
1034 .inplace(true)
1035 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1036 }
1037 }
1038
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,a_zero_point)1039 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, a_zero_point) {
1040 TEST_REQUIRES_X86_SSE41;
1041 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1042 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1043 VAddCMicrokernelTester()
1044 .batch_size(batch_size)
1045 .a_zero_point(a_zero_point)
1046 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1047 }
1048 }
1049 }
1050
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,b_zero_point)1051 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, b_zero_point) {
1052 TEST_REQUIRES_X86_SSE41;
1053 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1054 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1055 VAddCMicrokernelTester()
1056 .batch_size(batch_size)
1057 .b_zero_point(b_zero_point)
1058 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1059 }
1060 }
1061 }
1062
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,y_zero_point)1063 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, y_zero_point) {
1064 TEST_REQUIRES_X86_SSE41;
1065 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1066 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1067 VAddCMicrokernelTester()
1068 .batch_size(batch_size)
1069 .y_zero_point(y_zero_point)
1070 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1071 }
1072 }
1073 }
1074
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,a_scale)1075 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, a_scale) {
1076 TEST_REQUIRES_X86_SSE41;
1077 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1078 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1079 VAddCMicrokernelTester()
1080 .batch_size(batch_size)
1081 .a_scale(a_scale)
1082 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1083 }
1084 }
1085 }
1086
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,b_scale)1087 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, b_scale) {
1088 TEST_REQUIRES_X86_SSE41;
1089 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1090 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1091 VAddCMicrokernelTester()
1092 .batch_size(batch_size)
1093 .b_scale(b_scale)
1094 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1095 }
1096 }
1097 }
1098
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,y_scale)1099 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, y_scale) {
1100 TEST_REQUIRES_X86_SSE41;
1101 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1102 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1103 VAddCMicrokernelTester()
1104 .batch_size(batch_size)
1105 .y_scale(y_scale)
1106 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1107 }
1108 }
1109 }
1110
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,qmin)1111 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, qmin) {
1112 TEST_REQUIRES_X86_SSE41;
1113 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1114 VAddCMicrokernelTester()
1115 .batch_size(batch_size)
1116 .qmin(128)
1117 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1118 }
1119 }
1120
TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,qmax)1121 TEST(QU8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, qmax) {
1122 TEST_REQUIRES_X86_SSE41;
1123 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1124 VAddCMicrokernelTester()
1125 .batch_size(batch_size)
1126 .qmax(128)
1127 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1128 }
1129 }
1130 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1131
1132
1133 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8,batch_eq_8)1134 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8, batch_eq_8) {
1135 TEST_REQUIRES_X86_AVX;
1136 VAddCMicrokernelTester()
1137 .batch_size(8)
1138 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1139 }
1140
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8,batch_div_8)1141 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8, batch_div_8) {
1142 TEST_REQUIRES_X86_AVX;
1143 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1144 VAddCMicrokernelTester()
1145 .batch_size(batch_size)
1146 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1147 }
1148 }
1149
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8,batch_lt_8)1150 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8, batch_lt_8) {
1151 TEST_REQUIRES_X86_AVX;
1152 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1153 VAddCMicrokernelTester()
1154 .batch_size(batch_size)
1155 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1156 }
1157 }
1158
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8,batch_gt_8)1159 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8, batch_gt_8) {
1160 TEST_REQUIRES_X86_AVX;
1161 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1162 VAddCMicrokernelTester()
1163 .batch_size(batch_size)
1164 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1165 }
1166 }
1167
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8,inplace)1168 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8, inplace) {
1169 TEST_REQUIRES_X86_AVX;
1170 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1171 VAddCMicrokernelTester()
1172 .batch_size(batch_size)
1173 .inplace(true)
1174 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1175 }
1176 }
1177
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8,a_zero_point)1178 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8, a_zero_point) {
1179 TEST_REQUIRES_X86_AVX;
1180 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1181 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1182 VAddCMicrokernelTester()
1183 .batch_size(batch_size)
1184 .a_zero_point(a_zero_point)
1185 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1186 }
1187 }
1188 }
1189
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8,b_zero_point)1190 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8, b_zero_point) {
1191 TEST_REQUIRES_X86_AVX;
1192 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1193 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1194 VAddCMicrokernelTester()
1195 .batch_size(batch_size)
1196 .b_zero_point(b_zero_point)
1197 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1198 }
1199 }
1200 }
1201
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8,y_zero_point)1202 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8, y_zero_point) {
1203 TEST_REQUIRES_X86_AVX;
1204 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1205 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1206 VAddCMicrokernelTester()
1207 .batch_size(batch_size)
1208 .y_zero_point(y_zero_point)
1209 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1210 }
1211 }
1212 }
1213
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8,a_scale)1214 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8, a_scale) {
1215 TEST_REQUIRES_X86_AVX;
1216 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1217 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1218 VAddCMicrokernelTester()
1219 .batch_size(batch_size)
1220 .a_scale(a_scale)
1221 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1222 }
1223 }
1224 }
1225
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8,b_scale)1226 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8, b_scale) {
1227 TEST_REQUIRES_X86_AVX;
1228 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1229 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1230 VAddCMicrokernelTester()
1231 .batch_size(batch_size)
1232 .b_scale(b_scale)
1233 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1234 }
1235 }
1236 }
1237
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8,y_scale)1238 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8, y_scale) {
1239 TEST_REQUIRES_X86_AVX;
1240 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1241 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1242 VAddCMicrokernelTester()
1243 .batch_size(batch_size)
1244 .y_scale(y_scale)
1245 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1246 }
1247 }
1248 }
1249
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8,qmin)1250 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8, qmin) {
1251 TEST_REQUIRES_X86_AVX;
1252 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1253 VAddCMicrokernelTester()
1254 .batch_size(batch_size)
1255 .qmin(128)
1256 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1257 }
1258 }
1259
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8,qmax)1260 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X8, qmax) {
1261 TEST_REQUIRES_X86_AVX;
1262 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1263 VAddCMicrokernelTester()
1264 .batch_size(batch_size)
1265 .qmax(128)
1266 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qu8_add_minmax_sse2_params);
1267 }
1268 }
1269 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1270
1271
1272 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16,batch_eq_16)1273 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16, batch_eq_16) {
1274 TEST_REQUIRES_X86_AVX;
1275 VAddCMicrokernelTester()
1276 .batch_size(16)
1277 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1278 }
1279
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16,batch_div_16)1280 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16, batch_div_16) {
1281 TEST_REQUIRES_X86_AVX;
1282 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1283 VAddCMicrokernelTester()
1284 .batch_size(batch_size)
1285 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1286 }
1287 }
1288
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16,batch_lt_16)1289 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16, batch_lt_16) {
1290 TEST_REQUIRES_X86_AVX;
1291 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1292 VAddCMicrokernelTester()
1293 .batch_size(batch_size)
1294 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1295 }
1296 }
1297
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16,batch_gt_16)1298 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16, batch_gt_16) {
1299 TEST_REQUIRES_X86_AVX;
1300 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1301 VAddCMicrokernelTester()
1302 .batch_size(batch_size)
1303 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1304 }
1305 }
1306
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16,inplace)1307 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16, inplace) {
1308 TEST_REQUIRES_X86_AVX;
1309 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1310 VAddCMicrokernelTester()
1311 .batch_size(batch_size)
1312 .inplace(true)
1313 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1314 }
1315 }
1316
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16,a_zero_point)1317 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16, a_zero_point) {
1318 TEST_REQUIRES_X86_AVX;
1319 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1320 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1321 VAddCMicrokernelTester()
1322 .batch_size(batch_size)
1323 .a_zero_point(a_zero_point)
1324 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1325 }
1326 }
1327 }
1328
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16,b_zero_point)1329 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16, b_zero_point) {
1330 TEST_REQUIRES_X86_AVX;
1331 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1332 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1333 VAddCMicrokernelTester()
1334 .batch_size(batch_size)
1335 .b_zero_point(b_zero_point)
1336 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1337 }
1338 }
1339 }
1340
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16,y_zero_point)1341 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16, y_zero_point) {
1342 TEST_REQUIRES_X86_AVX;
1343 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1344 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1345 VAddCMicrokernelTester()
1346 .batch_size(batch_size)
1347 .y_zero_point(y_zero_point)
1348 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1349 }
1350 }
1351 }
1352
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16,a_scale)1353 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16, a_scale) {
1354 TEST_REQUIRES_X86_AVX;
1355 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1356 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1357 VAddCMicrokernelTester()
1358 .batch_size(batch_size)
1359 .a_scale(a_scale)
1360 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1361 }
1362 }
1363 }
1364
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16,b_scale)1365 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16, b_scale) {
1366 TEST_REQUIRES_X86_AVX;
1367 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1368 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1369 VAddCMicrokernelTester()
1370 .batch_size(batch_size)
1371 .b_scale(b_scale)
1372 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1373 }
1374 }
1375 }
1376
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16,y_scale)1377 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16, y_scale) {
1378 TEST_REQUIRES_X86_AVX;
1379 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1380 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1381 VAddCMicrokernelTester()
1382 .batch_size(batch_size)
1383 .y_scale(y_scale)
1384 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1385 }
1386 }
1387 }
1388
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16,qmin)1389 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16, qmin) {
1390 TEST_REQUIRES_X86_AVX;
1391 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1392 VAddCMicrokernelTester()
1393 .batch_size(batch_size)
1394 .qmin(128)
1395 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1396 }
1397 }
1398
TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16,qmax)1399 TEST(QU8_VADDC_MINMAX__AVX_MUL16_LD64_X16, qmax) {
1400 TEST_REQUIRES_X86_AVX;
1401 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1402 VAddCMicrokernelTester()
1403 .batch_size(batch_size)
1404 .qmax(128)
1405 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qu8_add_minmax_sse2_params);
1406 }
1407 }
1408 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1409
1410
1411 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,batch_eq_8)1412 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, batch_eq_8) {
1413 TEST_REQUIRES_X86_SSE41;
1414 VAddCMicrokernelTester()
1415 .batch_size(8)
1416 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1417 }
1418
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,batch_div_8)1419 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, batch_div_8) {
1420 TEST_REQUIRES_X86_SSE41;
1421 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1422 VAddCMicrokernelTester()
1423 .batch_size(batch_size)
1424 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1425 }
1426 }
1427
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,batch_lt_8)1428 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, batch_lt_8) {
1429 TEST_REQUIRES_X86_SSE41;
1430 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1431 VAddCMicrokernelTester()
1432 .batch_size(batch_size)
1433 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1434 }
1435 }
1436
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,batch_gt_8)1437 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, batch_gt_8) {
1438 TEST_REQUIRES_X86_SSE41;
1439 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1440 VAddCMicrokernelTester()
1441 .batch_size(batch_size)
1442 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1443 }
1444 }
1445
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,inplace)1446 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, inplace) {
1447 TEST_REQUIRES_X86_SSE41;
1448 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1449 VAddCMicrokernelTester()
1450 .batch_size(batch_size)
1451 .inplace(true)
1452 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1453 }
1454 }
1455
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,a_zero_point)1456 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, a_zero_point) {
1457 TEST_REQUIRES_X86_SSE41;
1458 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1459 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1460 VAddCMicrokernelTester()
1461 .batch_size(batch_size)
1462 .a_zero_point(a_zero_point)
1463 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1464 }
1465 }
1466 }
1467
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,b_zero_point)1468 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, b_zero_point) {
1469 TEST_REQUIRES_X86_SSE41;
1470 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1471 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1472 VAddCMicrokernelTester()
1473 .batch_size(batch_size)
1474 .b_zero_point(b_zero_point)
1475 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1476 }
1477 }
1478 }
1479
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,y_zero_point)1480 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, y_zero_point) {
1481 TEST_REQUIRES_X86_SSE41;
1482 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1483 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1484 VAddCMicrokernelTester()
1485 .batch_size(batch_size)
1486 .y_zero_point(y_zero_point)
1487 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1488 }
1489 }
1490 }
1491
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,a_scale)1492 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, a_scale) {
1493 TEST_REQUIRES_X86_SSE41;
1494 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1495 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1496 VAddCMicrokernelTester()
1497 .batch_size(batch_size)
1498 .a_scale(a_scale)
1499 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1500 }
1501 }
1502 }
1503
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,b_scale)1504 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, b_scale) {
1505 TEST_REQUIRES_X86_SSE41;
1506 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1507 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1508 VAddCMicrokernelTester()
1509 .batch_size(batch_size)
1510 .b_scale(b_scale)
1511 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1512 }
1513 }
1514 }
1515
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,y_scale)1516 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, y_scale) {
1517 TEST_REQUIRES_X86_SSE41;
1518 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1519 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1520 VAddCMicrokernelTester()
1521 .batch_size(batch_size)
1522 .y_scale(y_scale)
1523 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1524 }
1525 }
1526 }
1527
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,qmin)1528 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, qmin) {
1529 TEST_REQUIRES_X86_SSE41;
1530 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1531 VAddCMicrokernelTester()
1532 .batch_size(batch_size)
1533 .qmin(128)
1534 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1535 }
1536 }
1537
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,qmax)1538 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, qmax) {
1539 TEST_REQUIRES_X86_SSE41;
1540 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1541 VAddCMicrokernelTester()
1542 .batch_size(batch_size)
1543 .qmax(128)
1544 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1545 }
1546 }
1547 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1548
1549
1550 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,batch_eq_16)1551 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, batch_eq_16) {
1552 TEST_REQUIRES_X86_SSE41;
1553 VAddCMicrokernelTester()
1554 .batch_size(16)
1555 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1556 }
1557
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,batch_div_16)1558 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, batch_div_16) {
1559 TEST_REQUIRES_X86_SSE41;
1560 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1561 VAddCMicrokernelTester()
1562 .batch_size(batch_size)
1563 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1564 }
1565 }
1566
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,batch_lt_16)1567 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, batch_lt_16) {
1568 TEST_REQUIRES_X86_SSE41;
1569 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1570 VAddCMicrokernelTester()
1571 .batch_size(batch_size)
1572 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1573 }
1574 }
1575
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,batch_gt_16)1576 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, batch_gt_16) {
1577 TEST_REQUIRES_X86_SSE41;
1578 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1579 VAddCMicrokernelTester()
1580 .batch_size(batch_size)
1581 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1582 }
1583 }
1584
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,inplace)1585 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, inplace) {
1586 TEST_REQUIRES_X86_SSE41;
1587 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1588 VAddCMicrokernelTester()
1589 .batch_size(batch_size)
1590 .inplace(true)
1591 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1592 }
1593 }
1594
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,a_zero_point)1595 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, a_zero_point) {
1596 TEST_REQUIRES_X86_SSE41;
1597 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1598 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1599 VAddCMicrokernelTester()
1600 .batch_size(batch_size)
1601 .a_zero_point(a_zero_point)
1602 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1603 }
1604 }
1605 }
1606
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,b_zero_point)1607 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, b_zero_point) {
1608 TEST_REQUIRES_X86_SSE41;
1609 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1610 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1611 VAddCMicrokernelTester()
1612 .batch_size(batch_size)
1613 .b_zero_point(b_zero_point)
1614 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1615 }
1616 }
1617 }
1618
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,y_zero_point)1619 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, y_zero_point) {
1620 TEST_REQUIRES_X86_SSE41;
1621 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1622 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1623 VAddCMicrokernelTester()
1624 .batch_size(batch_size)
1625 .y_zero_point(y_zero_point)
1626 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1627 }
1628 }
1629 }
1630
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,a_scale)1631 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, a_scale) {
1632 TEST_REQUIRES_X86_SSE41;
1633 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1634 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1635 VAddCMicrokernelTester()
1636 .batch_size(batch_size)
1637 .a_scale(a_scale)
1638 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1639 }
1640 }
1641 }
1642
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,b_scale)1643 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, b_scale) {
1644 TEST_REQUIRES_X86_SSE41;
1645 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1646 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1647 VAddCMicrokernelTester()
1648 .batch_size(batch_size)
1649 .b_scale(b_scale)
1650 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1651 }
1652 }
1653 }
1654
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,y_scale)1655 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, y_scale) {
1656 TEST_REQUIRES_X86_SSE41;
1657 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1658 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1659 VAddCMicrokernelTester()
1660 .batch_size(batch_size)
1661 .y_scale(y_scale)
1662 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1663 }
1664 }
1665 }
1666
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,qmin)1667 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, qmin) {
1668 TEST_REQUIRES_X86_SSE41;
1669 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1670 VAddCMicrokernelTester()
1671 .batch_size(batch_size)
1672 .qmin(128)
1673 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1674 }
1675 }
1676
TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,qmax)1677 TEST(QU8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, qmax) {
1678 TEST_REQUIRES_X86_SSE41;
1679 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1680 VAddCMicrokernelTester()
1681 .batch_size(batch_size)
1682 .qmax(128)
1683 .Test(xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1684 }
1685 }
1686 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1687
1688
1689 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8,batch_eq_8)1690 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8, batch_eq_8) {
1691 TEST_REQUIRES_X86_AVX;
1692 VAddCMicrokernelTester()
1693 .batch_size(8)
1694 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1695 }
1696
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8,batch_div_8)1697 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8, batch_div_8) {
1698 TEST_REQUIRES_X86_AVX;
1699 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1700 VAddCMicrokernelTester()
1701 .batch_size(batch_size)
1702 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1703 }
1704 }
1705
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8,batch_lt_8)1706 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8, batch_lt_8) {
1707 TEST_REQUIRES_X86_AVX;
1708 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1709 VAddCMicrokernelTester()
1710 .batch_size(batch_size)
1711 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1712 }
1713 }
1714
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8,batch_gt_8)1715 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8, batch_gt_8) {
1716 TEST_REQUIRES_X86_AVX;
1717 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1718 VAddCMicrokernelTester()
1719 .batch_size(batch_size)
1720 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1721 }
1722 }
1723
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8,inplace)1724 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8, inplace) {
1725 TEST_REQUIRES_X86_AVX;
1726 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1727 VAddCMicrokernelTester()
1728 .batch_size(batch_size)
1729 .inplace(true)
1730 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1731 }
1732 }
1733
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8,a_zero_point)1734 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8, a_zero_point) {
1735 TEST_REQUIRES_X86_AVX;
1736 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1737 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1738 VAddCMicrokernelTester()
1739 .batch_size(batch_size)
1740 .a_zero_point(a_zero_point)
1741 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1742 }
1743 }
1744 }
1745
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8,b_zero_point)1746 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8, b_zero_point) {
1747 TEST_REQUIRES_X86_AVX;
1748 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1749 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1750 VAddCMicrokernelTester()
1751 .batch_size(batch_size)
1752 .b_zero_point(b_zero_point)
1753 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1754 }
1755 }
1756 }
1757
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8,y_zero_point)1758 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8, y_zero_point) {
1759 TEST_REQUIRES_X86_AVX;
1760 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1761 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1762 VAddCMicrokernelTester()
1763 .batch_size(batch_size)
1764 .y_zero_point(y_zero_point)
1765 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1766 }
1767 }
1768 }
1769
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8,a_scale)1770 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8, a_scale) {
1771 TEST_REQUIRES_X86_AVX;
1772 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1773 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1774 VAddCMicrokernelTester()
1775 .batch_size(batch_size)
1776 .a_scale(a_scale)
1777 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1778 }
1779 }
1780 }
1781
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8,b_scale)1782 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8, b_scale) {
1783 TEST_REQUIRES_X86_AVX;
1784 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1785 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1786 VAddCMicrokernelTester()
1787 .batch_size(batch_size)
1788 .b_scale(b_scale)
1789 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1790 }
1791 }
1792 }
1793
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8,y_scale)1794 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8, y_scale) {
1795 TEST_REQUIRES_X86_AVX;
1796 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1797 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1798 VAddCMicrokernelTester()
1799 .batch_size(batch_size)
1800 .y_scale(y_scale)
1801 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1802 }
1803 }
1804 }
1805
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8,qmin)1806 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8, qmin) {
1807 TEST_REQUIRES_X86_AVX;
1808 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1809 VAddCMicrokernelTester()
1810 .batch_size(batch_size)
1811 .qmin(128)
1812 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1813 }
1814 }
1815
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8,qmax)1816 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X8, qmax) {
1817 TEST_REQUIRES_X86_AVX;
1818 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1819 VAddCMicrokernelTester()
1820 .batch_size(batch_size)
1821 .qmax(128)
1822 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1823 }
1824 }
1825 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1826
1827
1828 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16,batch_eq_16)1829 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16, batch_eq_16) {
1830 TEST_REQUIRES_X86_AVX;
1831 VAddCMicrokernelTester()
1832 .batch_size(16)
1833 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1834 }
1835
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16,batch_div_16)1836 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16, batch_div_16) {
1837 TEST_REQUIRES_X86_AVX;
1838 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1839 VAddCMicrokernelTester()
1840 .batch_size(batch_size)
1841 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1842 }
1843 }
1844
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16,batch_lt_16)1845 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16, batch_lt_16) {
1846 TEST_REQUIRES_X86_AVX;
1847 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1848 VAddCMicrokernelTester()
1849 .batch_size(batch_size)
1850 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1851 }
1852 }
1853
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16,batch_gt_16)1854 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16, batch_gt_16) {
1855 TEST_REQUIRES_X86_AVX;
1856 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1857 VAddCMicrokernelTester()
1858 .batch_size(batch_size)
1859 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1860 }
1861 }
1862
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16,inplace)1863 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16, inplace) {
1864 TEST_REQUIRES_X86_AVX;
1865 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1866 VAddCMicrokernelTester()
1867 .batch_size(batch_size)
1868 .inplace(true)
1869 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1870 }
1871 }
1872
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16,a_zero_point)1873 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16, a_zero_point) {
1874 TEST_REQUIRES_X86_AVX;
1875 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1876 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1877 VAddCMicrokernelTester()
1878 .batch_size(batch_size)
1879 .a_zero_point(a_zero_point)
1880 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1881 }
1882 }
1883 }
1884
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16,b_zero_point)1885 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16, b_zero_point) {
1886 TEST_REQUIRES_X86_AVX;
1887 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1888 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1889 VAddCMicrokernelTester()
1890 .batch_size(batch_size)
1891 .b_zero_point(b_zero_point)
1892 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1893 }
1894 }
1895 }
1896
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16,y_zero_point)1897 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16, y_zero_point) {
1898 TEST_REQUIRES_X86_AVX;
1899 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1900 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1901 VAddCMicrokernelTester()
1902 .batch_size(batch_size)
1903 .y_zero_point(y_zero_point)
1904 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1905 }
1906 }
1907 }
1908
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16,a_scale)1909 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16, a_scale) {
1910 TEST_REQUIRES_X86_AVX;
1911 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1912 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1913 VAddCMicrokernelTester()
1914 .batch_size(batch_size)
1915 .a_scale(a_scale)
1916 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1917 }
1918 }
1919 }
1920
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16,b_scale)1921 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16, b_scale) {
1922 TEST_REQUIRES_X86_AVX;
1923 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1924 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1925 VAddCMicrokernelTester()
1926 .batch_size(batch_size)
1927 .b_scale(b_scale)
1928 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1929 }
1930 }
1931 }
1932
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16,y_scale)1933 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16, y_scale) {
1934 TEST_REQUIRES_X86_AVX;
1935 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1936 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1937 VAddCMicrokernelTester()
1938 .batch_size(batch_size)
1939 .y_scale(y_scale)
1940 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1941 }
1942 }
1943 }
1944
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16,qmin)1945 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16, qmin) {
1946 TEST_REQUIRES_X86_AVX;
1947 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1948 VAddCMicrokernelTester()
1949 .batch_size(batch_size)
1950 .qmin(128)
1951 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1952 }
1953 }
1954
TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16,qmax)1955 TEST(QU8_VADDC_MINMAX__AVX_MUL32_LD32_X16, qmax) {
1956 TEST_REQUIRES_X86_AVX;
1957 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1958 VAddCMicrokernelTester()
1959 .batch_size(batch_size)
1960 .qmax(128)
1961 .Test(xnn_qu8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
1962 }
1963 }
1964 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1965
1966
1967 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8,batch_eq_8)1968 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8, batch_eq_8) {
1969 TEST_REQUIRES_X86_XOP;
1970 VAddCMicrokernelTester()
1971 .batch_size(8)
1972 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1973 }
1974
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8,batch_div_8)1975 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8, batch_div_8) {
1976 TEST_REQUIRES_X86_XOP;
1977 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1978 VAddCMicrokernelTester()
1979 .batch_size(batch_size)
1980 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1981 }
1982 }
1983
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8,batch_lt_8)1984 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8, batch_lt_8) {
1985 TEST_REQUIRES_X86_XOP;
1986 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1987 VAddCMicrokernelTester()
1988 .batch_size(batch_size)
1989 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1990 }
1991 }
1992
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8,batch_gt_8)1993 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8, batch_gt_8) {
1994 TEST_REQUIRES_X86_XOP;
1995 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1996 VAddCMicrokernelTester()
1997 .batch_size(batch_size)
1998 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
1999 }
2000 }
2001
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8,inplace)2002 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8, inplace) {
2003 TEST_REQUIRES_X86_XOP;
2004 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2005 VAddCMicrokernelTester()
2006 .batch_size(batch_size)
2007 .inplace(true)
2008 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2009 }
2010 }
2011
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8,a_zero_point)2012 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8, a_zero_point) {
2013 TEST_REQUIRES_X86_XOP;
2014 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2015 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2016 VAddCMicrokernelTester()
2017 .batch_size(batch_size)
2018 .a_zero_point(a_zero_point)
2019 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2020 }
2021 }
2022 }
2023
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8,b_zero_point)2024 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8, b_zero_point) {
2025 TEST_REQUIRES_X86_XOP;
2026 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2027 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2028 VAddCMicrokernelTester()
2029 .batch_size(batch_size)
2030 .b_zero_point(b_zero_point)
2031 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2032 }
2033 }
2034 }
2035
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8,y_zero_point)2036 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8, y_zero_point) {
2037 TEST_REQUIRES_X86_XOP;
2038 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2039 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2040 VAddCMicrokernelTester()
2041 .batch_size(batch_size)
2042 .y_zero_point(y_zero_point)
2043 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2044 }
2045 }
2046 }
2047
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8,a_scale)2048 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8, a_scale) {
2049 TEST_REQUIRES_X86_XOP;
2050 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2051 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2052 VAddCMicrokernelTester()
2053 .batch_size(batch_size)
2054 .a_scale(a_scale)
2055 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2056 }
2057 }
2058 }
2059
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8,b_scale)2060 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8, b_scale) {
2061 TEST_REQUIRES_X86_XOP;
2062 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2063 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2064 VAddCMicrokernelTester()
2065 .batch_size(batch_size)
2066 .b_scale(b_scale)
2067 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2068 }
2069 }
2070 }
2071
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8,y_scale)2072 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8, y_scale) {
2073 TEST_REQUIRES_X86_XOP;
2074 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2075 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2076 VAddCMicrokernelTester()
2077 .batch_size(batch_size)
2078 .y_scale(y_scale)
2079 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2080 }
2081 }
2082 }
2083
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8,qmin)2084 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8, qmin) {
2085 TEST_REQUIRES_X86_XOP;
2086 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2087 VAddCMicrokernelTester()
2088 .batch_size(batch_size)
2089 .qmin(128)
2090 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2091 }
2092 }
2093
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8,qmax)2094 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X8, qmax) {
2095 TEST_REQUIRES_X86_XOP;
2096 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2097 VAddCMicrokernelTester()
2098 .batch_size(batch_size)
2099 .qmax(128)
2100 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qu8_add_minmax_sse4_params);
2101 }
2102 }
2103 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2104
2105
2106 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16,batch_eq_16)2107 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16, batch_eq_16) {
2108 TEST_REQUIRES_X86_XOP;
2109 VAddCMicrokernelTester()
2110 .batch_size(16)
2111 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2112 }
2113
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16,batch_div_16)2114 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16, batch_div_16) {
2115 TEST_REQUIRES_X86_XOP;
2116 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2117 VAddCMicrokernelTester()
2118 .batch_size(batch_size)
2119 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2120 }
2121 }
2122
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16,batch_lt_16)2123 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16, batch_lt_16) {
2124 TEST_REQUIRES_X86_XOP;
2125 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2126 VAddCMicrokernelTester()
2127 .batch_size(batch_size)
2128 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2129 }
2130 }
2131
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16,batch_gt_16)2132 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16, batch_gt_16) {
2133 TEST_REQUIRES_X86_XOP;
2134 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2135 VAddCMicrokernelTester()
2136 .batch_size(batch_size)
2137 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2138 }
2139 }
2140
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16,inplace)2141 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16, inplace) {
2142 TEST_REQUIRES_X86_XOP;
2143 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2144 VAddCMicrokernelTester()
2145 .batch_size(batch_size)
2146 .inplace(true)
2147 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2148 }
2149 }
2150
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16,a_zero_point)2151 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16, a_zero_point) {
2152 TEST_REQUIRES_X86_XOP;
2153 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2154 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2155 VAddCMicrokernelTester()
2156 .batch_size(batch_size)
2157 .a_zero_point(a_zero_point)
2158 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2159 }
2160 }
2161 }
2162
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16,b_zero_point)2163 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16, b_zero_point) {
2164 TEST_REQUIRES_X86_XOP;
2165 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2166 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2167 VAddCMicrokernelTester()
2168 .batch_size(batch_size)
2169 .b_zero_point(b_zero_point)
2170 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2171 }
2172 }
2173 }
2174
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16,y_zero_point)2175 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16, y_zero_point) {
2176 TEST_REQUIRES_X86_XOP;
2177 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2178 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2179 VAddCMicrokernelTester()
2180 .batch_size(batch_size)
2181 .y_zero_point(y_zero_point)
2182 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2183 }
2184 }
2185 }
2186
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16,a_scale)2187 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16, a_scale) {
2188 TEST_REQUIRES_X86_XOP;
2189 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2190 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2191 VAddCMicrokernelTester()
2192 .batch_size(batch_size)
2193 .a_scale(a_scale)
2194 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2195 }
2196 }
2197 }
2198
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16,b_scale)2199 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16, b_scale) {
2200 TEST_REQUIRES_X86_XOP;
2201 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2202 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2203 VAddCMicrokernelTester()
2204 .batch_size(batch_size)
2205 .b_scale(b_scale)
2206 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2207 }
2208 }
2209 }
2210
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16,y_scale)2211 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16, y_scale) {
2212 TEST_REQUIRES_X86_XOP;
2213 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2214 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2215 VAddCMicrokernelTester()
2216 .batch_size(batch_size)
2217 .y_scale(y_scale)
2218 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2219 }
2220 }
2221 }
2222
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16,qmin)2223 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16, qmin) {
2224 TEST_REQUIRES_X86_XOP;
2225 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2226 VAddCMicrokernelTester()
2227 .batch_size(batch_size)
2228 .qmin(128)
2229 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2230 }
2231 }
2232
TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16,qmax)2233 TEST(QU8_VADDC_MINMAX__XOP_MUL32_LD32_X16, qmax) {
2234 TEST_REQUIRES_X86_XOP;
2235 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2236 VAddCMicrokernelTester()
2237 .batch_size(batch_size)
2238 .qmax(128)
2239 .Test(xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qu8_add_minmax_sse4_params);
2240 }
2241 }
2242 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2243
2244
2245 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,batch_eq_8)2246 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, batch_eq_8) {
2247 TEST_REQUIRES_X86_AVX2;
2248 VAddCMicrokernelTester()
2249 .batch_size(8)
2250 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2251 }
2252
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,batch_div_8)2253 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, batch_div_8) {
2254 TEST_REQUIRES_X86_AVX2;
2255 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2256 VAddCMicrokernelTester()
2257 .batch_size(batch_size)
2258 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2259 }
2260 }
2261
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,batch_lt_8)2262 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, batch_lt_8) {
2263 TEST_REQUIRES_X86_AVX2;
2264 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2265 VAddCMicrokernelTester()
2266 .batch_size(batch_size)
2267 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2268 }
2269 }
2270
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,batch_gt_8)2271 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, batch_gt_8) {
2272 TEST_REQUIRES_X86_AVX2;
2273 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2274 VAddCMicrokernelTester()
2275 .batch_size(batch_size)
2276 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2277 }
2278 }
2279
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,inplace)2280 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, inplace) {
2281 TEST_REQUIRES_X86_AVX2;
2282 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2283 VAddCMicrokernelTester()
2284 .batch_size(batch_size)
2285 .inplace(true)
2286 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2287 }
2288 }
2289
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,a_zero_point)2290 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, a_zero_point) {
2291 TEST_REQUIRES_X86_AVX2;
2292 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2293 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2294 VAddCMicrokernelTester()
2295 .batch_size(batch_size)
2296 .a_zero_point(a_zero_point)
2297 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2298 }
2299 }
2300 }
2301
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,b_zero_point)2302 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, b_zero_point) {
2303 TEST_REQUIRES_X86_AVX2;
2304 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2305 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2306 VAddCMicrokernelTester()
2307 .batch_size(batch_size)
2308 .b_zero_point(b_zero_point)
2309 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2310 }
2311 }
2312 }
2313
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,y_zero_point)2314 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, y_zero_point) {
2315 TEST_REQUIRES_X86_AVX2;
2316 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2317 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2318 VAddCMicrokernelTester()
2319 .batch_size(batch_size)
2320 .y_zero_point(y_zero_point)
2321 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2322 }
2323 }
2324 }
2325
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,a_scale)2326 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, a_scale) {
2327 TEST_REQUIRES_X86_AVX2;
2328 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2329 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2330 VAddCMicrokernelTester()
2331 .batch_size(batch_size)
2332 .a_scale(a_scale)
2333 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2334 }
2335 }
2336 }
2337
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,b_scale)2338 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, b_scale) {
2339 TEST_REQUIRES_X86_AVX2;
2340 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2341 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2342 VAddCMicrokernelTester()
2343 .batch_size(batch_size)
2344 .b_scale(b_scale)
2345 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2346 }
2347 }
2348 }
2349
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,y_scale)2350 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, y_scale) {
2351 TEST_REQUIRES_X86_AVX2;
2352 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2353 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2354 VAddCMicrokernelTester()
2355 .batch_size(batch_size)
2356 .y_scale(y_scale)
2357 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2358 }
2359 }
2360 }
2361
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,qmin)2362 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, qmin) {
2363 TEST_REQUIRES_X86_AVX2;
2364 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2365 VAddCMicrokernelTester()
2366 .batch_size(batch_size)
2367 .qmin(128)
2368 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2369 }
2370 }
2371
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,qmax)2372 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, qmax) {
2373 TEST_REQUIRES_X86_AVX2;
2374 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2375 VAddCMicrokernelTester()
2376 .batch_size(batch_size)
2377 .qmax(128)
2378 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qu8_add_minmax_avx2_params);
2379 }
2380 }
2381 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2382
2383
2384 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,batch_eq_16)2385 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, batch_eq_16) {
2386 TEST_REQUIRES_X86_AVX2;
2387 VAddCMicrokernelTester()
2388 .batch_size(16)
2389 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2390 }
2391
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,batch_div_16)2392 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, batch_div_16) {
2393 TEST_REQUIRES_X86_AVX2;
2394 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2395 VAddCMicrokernelTester()
2396 .batch_size(batch_size)
2397 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2398 }
2399 }
2400
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,batch_lt_16)2401 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, batch_lt_16) {
2402 TEST_REQUIRES_X86_AVX2;
2403 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2404 VAddCMicrokernelTester()
2405 .batch_size(batch_size)
2406 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2407 }
2408 }
2409
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,batch_gt_16)2410 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, batch_gt_16) {
2411 TEST_REQUIRES_X86_AVX2;
2412 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2413 VAddCMicrokernelTester()
2414 .batch_size(batch_size)
2415 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2416 }
2417 }
2418
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,inplace)2419 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, inplace) {
2420 TEST_REQUIRES_X86_AVX2;
2421 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2422 VAddCMicrokernelTester()
2423 .batch_size(batch_size)
2424 .inplace(true)
2425 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2426 }
2427 }
2428
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,a_zero_point)2429 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, a_zero_point) {
2430 TEST_REQUIRES_X86_AVX2;
2431 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2432 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2433 VAddCMicrokernelTester()
2434 .batch_size(batch_size)
2435 .a_zero_point(a_zero_point)
2436 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2437 }
2438 }
2439 }
2440
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,b_zero_point)2441 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, b_zero_point) {
2442 TEST_REQUIRES_X86_AVX2;
2443 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2444 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2445 VAddCMicrokernelTester()
2446 .batch_size(batch_size)
2447 .b_zero_point(b_zero_point)
2448 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2449 }
2450 }
2451 }
2452
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,y_zero_point)2453 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, y_zero_point) {
2454 TEST_REQUIRES_X86_AVX2;
2455 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2456 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2457 VAddCMicrokernelTester()
2458 .batch_size(batch_size)
2459 .y_zero_point(y_zero_point)
2460 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2461 }
2462 }
2463 }
2464
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,a_scale)2465 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, a_scale) {
2466 TEST_REQUIRES_X86_AVX2;
2467 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2468 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2469 VAddCMicrokernelTester()
2470 .batch_size(batch_size)
2471 .a_scale(a_scale)
2472 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2473 }
2474 }
2475 }
2476
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,b_scale)2477 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, b_scale) {
2478 TEST_REQUIRES_X86_AVX2;
2479 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2480 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2481 VAddCMicrokernelTester()
2482 .batch_size(batch_size)
2483 .b_scale(b_scale)
2484 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2485 }
2486 }
2487 }
2488
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,y_scale)2489 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, y_scale) {
2490 TEST_REQUIRES_X86_AVX2;
2491 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2492 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2493 VAddCMicrokernelTester()
2494 .batch_size(batch_size)
2495 .y_scale(y_scale)
2496 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2497 }
2498 }
2499 }
2500
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,qmin)2501 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, qmin) {
2502 TEST_REQUIRES_X86_AVX2;
2503 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2504 VAddCMicrokernelTester()
2505 .batch_size(batch_size)
2506 .qmin(128)
2507 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2508 }
2509 }
2510
TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,qmax)2511 TEST(QU8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, qmax) {
2512 TEST_REQUIRES_X86_AVX2;
2513 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2514 VAddCMicrokernelTester()
2515 .batch_size(batch_size)
2516 .qmax(128)
2517 .Test(xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qu8_add_minmax_avx2_params);
2518 }
2519 }
2520 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2521
2522
2523 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,batch_eq_16)2524 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_eq_16) {
2525 TEST_REQUIRES_X86_AVX512SKX;
2526 VAddCMicrokernelTester()
2527 .batch_size(16)
2528 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2529 }
2530
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,batch_div_16)2531 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_div_16) {
2532 TEST_REQUIRES_X86_AVX512SKX;
2533 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2534 VAddCMicrokernelTester()
2535 .batch_size(batch_size)
2536 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2537 }
2538 }
2539
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,batch_lt_16)2540 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_lt_16) {
2541 TEST_REQUIRES_X86_AVX512SKX;
2542 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2543 VAddCMicrokernelTester()
2544 .batch_size(batch_size)
2545 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2546 }
2547 }
2548
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,batch_gt_16)2549 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_gt_16) {
2550 TEST_REQUIRES_X86_AVX512SKX;
2551 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2552 VAddCMicrokernelTester()
2553 .batch_size(batch_size)
2554 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2555 }
2556 }
2557
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,inplace)2558 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, inplace) {
2559 TEST_REQUIRES_X86_AVX512SKX;
2560 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2561 VAddCMicrokernelTester()
2562 .batch_size(batch_size)
2563 .inplace(true)
2564 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2565 }
2566 }
2567
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,a_zero_point)2568 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, a_zero_point) {
2569 TEST_REQUIRES_X86_AVX512SKX;
2570 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2571 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2572 VAddCMicrokernelTester()
2573 .batch_size(batch_size)
2574 .a_zero_point(a_zero_point)
2575 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2576 }
2577 }
2578 }
2579
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,b_zero_point)2580 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, b_zero_point) {
2581 TEST_REQUIRES_X86_AVX512SKX;
2582 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2583 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2584 VAddCMicrokernelTester()
2585 .batch_size(batch_size)
2586 .b_zero_point(b_zero_point)
2587 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2588 }
2589 }
2590 }
2591
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,y_zero_point)2592 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, y_zero_point) {
2593 TEST_REQUIRES_X86_AVX512SKX;
2594 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2595 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2596 VAddCMicrokernelTester()
2597 .batch_size(batch_size)
2598 .y_zero_point(y_zero_point)
2599 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2600 }
2601 }
2602 }
2603
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,a_scale)2604 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, a_scale) {
2605 TEST_REQUIRES_X86_AVX512SKX;
2606 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2607 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2608 VAddCMicrokernelTester()
2609 .batch_size(batch_size)
2610 .a_scale(a_scale)
2611 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2612 }
2613 }
2614 }
2615
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,b_scale)2616 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, b_scale) {
2617 TEST_REQUIRES_X86_AVX512SKX;
2618 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2619 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2620 VAddCMicrokernelTester()
2621 .batch_size(batch_size)
2622 .b_scale(b_scale)
2623 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2624 }
2625 }
2626 }
2627
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,y_scale)2628 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, y_scale) {
2629 TEST_REQUIRES_X86_AVX512SKX;
2630 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2631 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2632 VAddCMicrokernelTester()
2633 .batch_size(batch_size)
2634 .y_scale(y_scale)
2635 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2636 }
2637 }
2638 }
2639
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,qmin)2640 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, qmin) {
2641 TEST_REQUIRES_X86_AVX512SKX;
2642 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2643 VAddCMicrokernelTester()
2644 .batch_size(batch_size)
2645 .qmin(128)
2646 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2647 }
2648 }
2649
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,qmax)2650 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, qmax) {
2651 TEST_REQUIRES_X86_AVX512SKX;
2652 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2653 VAddCMicrokernelTester()
2654 .batch_size(batch_size)
2655 .qmax(128)
2656 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qu8_add_minmax_avx512_params);
2657 }
2658 }
2659 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2660
2661
2662 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,batch_eq_32)2663 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_eq_32) {
2664 TEST_REQUIRES_X86_AVX512SKX;
2665 VAddCMicrokernelTester()
2666 .batch_size(32)
2667 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
2668 }
2669
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,batch_div_32)2670 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_div_32) {
2671 TEST_REQUIRES_X86_AVX512SKX;
2672 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
2673 VAddCMicrokernelTester()
2674 .batch_size(batch_size)
2675 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
2676 }
2677 }
2678
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,batch_lt_32)2679 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_lt_32) {
2680 TEST_REQUIRES_X86_AVX512SKX;
2681 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
2682 VAddCMicrokernelTester()
2683 .batch_size(batch_size)
2684 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
2685 }
2686 }
2687
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,batch_gt_32)2688 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_gt_32) {
2689 TEST_REQUIRES_X86_AVX512SKX;
2690 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
2691 VAddCMicrokernelTester()
2692 .batch_size(batch_size)
2693 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
2694 }
2695 }
2696
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,inplace)2697 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, inplace) {
2698 TEST_REQUIRES_X86_AVX512SKX;
2699 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2700 VAddCMicrokernelTester()
2701 .batch_size(batch_size)
2702 .inplace(true)
2703 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
2704 }
2705 }
2706
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,a_zero_point)2707 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, a_zero_point) {
2708 TEST_REQUIRES_X86_AVX512SKX;
2709 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2710 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2711 VAddCMicrokernelTester()
2712 .batch_size(batch_size)
2713 .a_zero_point(a_zero_point)
2714 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
2715 }
2716 }
2717 }
2718
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,b_zero_point)2719 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, b_zero_point) {
2720 TEST_REQUIRES_X86_AVX512SKX;
2721 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2722 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2723 VAddCMicrokernelTester()
2724 .batch_size(batch_size)
2725 .b_zero_point(b_zero_point)
2726 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
2727 }
2728 }
2729 }
2730
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,y_zero_point)2731 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, y_zero_point) {
2732 TEST_REQUIRES_X86_AVX512SKX;
2733 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2734 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2735 VAddCMicrokernelTester()
2736 .batch_size(batch_size)
2737 .y_zero_point(y_zero_point)
2738 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
2739 }
2740 }
2741 }
2742
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,a_scale)2743 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, a_scale) {
2744 TEST_REQUIRES_X86_AVX512SKX;
2745 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2746 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2747 VAddCMicrokernelTester()
2748 .batch_size(batch_size)
2749 .a_scale(a_scale)
2750 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
2751 }
2752 }
2753 }
2754
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,b_scale)2755 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, b_scale) {
2756 TEST_REQUIRES_X86_AVX512SKX;
2757 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2758 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2759 VAddCMicrokernelTester()
2760 .batch_size(batch_size)
2761 .b_scale(b_scale)
2762 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
2763 }
2764 }
2765 }
2766
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,y_scale)2767 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, y_scale) {
2768 TEST_REQUIRES_X86_AVX512SKX;
2769 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2770 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2771 VAddCMicrokernelTester()
2772 .batch_size(batch_size)
2773 .y_scale(y_scale)
2774 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
2775 }
2776 }
2777 }
2778
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,qmin)2779 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, qmin) {
2780 TEST_REQUIRES_X86_AVX512SKX;
2781 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2782 VAddCMicrokernelTester()
2783 .batch_size(batch_size)
2784 .qmin(128)
2785 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
2786 }
2787 }
2788
TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,qmax)2789 TEST(QU8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, qmax) {
2790 TEST_REQUIRES_X86_AVX512SKX;
2791 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2792 VAddCMicrokernelTester()
2793 .batch_size(batch_size)
2794 .qmax(128)
2795 .Test(xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qu8_add_minmax_avx512_params);
2796 }
2797 }
2798 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2799
2800
2801 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_VADDC_MINMAX__WASMSIMD_X8,batch_eq_8)2802 TEST(QU8_VADDC_MINMAX__WASMSIMD_X8, batch_eq_8) {
2803 VAddCMicrokernelTester()
2804 .batch_size(8)
2805 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
2806 }
2807
TEST(QU8_VADDC_MINMAX__WASMSIMD_X8,batch_div_8)2808 TEST(QU8_VADDC_MINMAX__WASMSIMD_X8, batch_div_8) {
2809 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2810 VAddCMicrokernelTester()
2811 .batch_size(batch_size)
2812 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
2813 }
2814 }
2815
TEST(QU8_VADDC_MINMAX__WASMSIMD_X8,batch_lt_8)2816 TEST(QU8_VADDC_MINMAX__WASMSIMD_X8, batch_lt_8) {
2817 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2818 VAddCMicrokernelTester()
2819 .batch_size(batch_size)
2820 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
2821 }
2822 }
2823
TEST(QU8_VADDC_MINMAX__WASMSIMD_X8,batch_gt_8)2824 TEST(QU8_VADDC_MINMAX__WASMSIMD_X8, batch_gt_8) {
2825 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2826 VAddCMicrokernelTester()
2827 .batch_size(batch_size)
2828 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
2829 }
2830 }
2831
TEST(QU8_VADDC_MINMAX__WASMSIMD_X8,inplace)2832 TEST(QU8_VADDC_MINMAX__WASMSIMD_X8, inplace) {
2833 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2834 VAddCMicrokernelTester()
2835 .batch_size(batch_size)
2836 .inplace(true)
2837 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
2838 }
2839 }
2840
TEST(QU8_VADDC_MINMAX__WASMSIMD_X8,a_zero_point)2841 TEST(QU8_VADDC_MINMAX__WASMSIMD_X8, a_zero_point) {
2842 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2843 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2844 VAddCMicrokernelTester()
2845 .batch_size(batch_size)
2846 .a_zero_point(a_zero_point)
2847 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
2848 }
2849 }
2850 }
2851
TEST(QU8_VADDC_MINMAX__WASMSIMD_X8,b_zero_point)2852 TEST(QU8_VADDC_MINMAX__WASMSIMD_X8, b_zero_point) {
2853 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2854 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2855 VAddCMicrokernelTester()
2856 .batch_size(batch_size)
2857 .b_zero_point(b_zero_point)
2858 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
2859 }
2860 }
2861 }
2862
TEST(QU8_VADDC_MINMAX__WASMSIMD_X8,y_zero_point)2863 TEST(QU8_VADDC_MINMAX__WASMSIMD_X8, y_zero_point) {
2864 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2865 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2866 VAddCMicrokernelTester()
2867 .batch_size(batch_size)
2868 .y_zero_point(y_zero_point)
2869 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
2870 }
2871 }
2872 }
2873
TEST(QU8_VADDC_MINMAX__WASMSIMD_X8,a_scale)2874 TEST(QU8_VADDC_MINMAX__WASMSIMD_X8, a_scale) {
2875 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2876 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2877 VAddCMicrokernelTester()
2878 .batch_size(batch_size)
2879 .a_scale(a_scale)
2880 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
2881 }
2882 }
2883 }
2884
TEST(QU8_VADDC_MINMAX__WASMSIMD_X8,b_scale)2885 TEST(QU8_VADDC_MINMAX__WASMSIMD_X8, b_scale) {
2886 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2887 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2888 VAddCMicrokernelTester()
2889 .batch_size(batch_size)
2890 .b_scale(b_scale)
2891 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
2892 }
2893 }
2894 }
2895
TEST(QU8_VADDC_MINMAX__WASMSIMD_X8,y_scale)2896 TEST(QU8_VADDC_MINMAX__WASMSIMD_X8, y_scale) {
2897 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2898 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2899 VAddCMicrokernelTester()
2900 .batch_size(batch_size)
2901 .y_scale(y_scale)
2902 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
2903 }
2904 }
2905 }
2906
TEST(QU8_VADDC_MINMAX__WASMSIMD_X8,qmin)2907 TEST(QU8_VADDC_MINMAX__WASMSIMD_X8, qmin) {
2908 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2909 VAddCMicrokernelTester()
2910 .batch_size(batch_size)
2911 .qmin(128)
2912 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
2913 }
2914 }
2915
TEST(QU8_VADDC_MINMAX__WASMSIMD_X8,qmax)2916 TEST(QU8_VADDC_MINMAX__WASMSIMD_X8, qmax) {
2917 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2918 VAddCMicrokernelTester()
2919 .batch_size(batch_size)
2920 .qmax(128)
2921 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qu8_add_minmax_wasmsimd_params);
2922 }
2923 }
2924 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2925
2926
2927 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_VADDC_MINMAX__WASMSIMD_X16,batch_eq_16)2928 TEST(QU8_VADDC_MINMAX__WASMSIMD_X16, batch_eq_16) {
2929 VAddCMicrokernelTester()
2930 .batch_size(16)
2931 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
2932 }
2933
TEST(QU8_VADDC_MINMAX__WASMSIMD_X16,batch_div_16)2934 TEST(QU8_VADDC_MINMAX__WASMSIMD_X16, batch_div_16) {
2935 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2936 VAddCMicrokernelTester()
2937 .batch_size(batch_size)
2938 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
2939 }
2940 }
2941
TEST(QU8_VADDC_MINMAX__WASMSIMD_X16,batch_lt_16)2942 TEST(QU8_VADDC_MINMAX__WASMSIMD_X16, batch_lt_16) {
2943 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2944 VAddCMicrokernelTester()
2945 .batch_size(batch_size)
2946 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
2947 }
2948 }
2949
TEST(QU8_VADDC_MINMAX__WASMSIMD_X16,batch_gt_16)2950 TEST(QU8_VADDC_MINMAX__WASMSIMD_X16, batch_gt_16) {
2951 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2952 VAddCMicrokernelTester()
2953 .batch_size(batch_size)
2954 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
2955 }
2956 }
2957
TEST(QU8_VADDC_MINMAX__WASMSIMD_X16,inplace)2958 TEST(QU8_VADDC_MINMAX__WASMSIMD_X16, inplace) {
2959 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2960 VAddCMicrokernelTester()
2961 .batch_size(batch_size)
2962 .inplace(true)
2963 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
2964 }
2965 }
2966
TEST(QU8_VADDC_MINMAX__WASMSIMD_X16,a_zero_point)2967 TEST(QU8_VADDC_MINMAX__WASMSIMD_X16, a_zero_point) {
2968 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2969 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2970 VAddCMicrokernelTester()
2971 .batch_size(batch_size)
2972 .a_zero_point(a_zero_point)
2973 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
2974 }
2975 }
2976 }
2977
TEST(QU8_VADDC_MINMAX__WASMSIMD_X16,b_zero_point)2978 TEST(QU8_VADDC_MINMAX__WASMSIMD_X16, b_zero_point) {
2979 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2980 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2981 VAddCMicrokernelTester()
2982 .batch_size(batch_size)
2983 .b_zero_point(b_zero_point)
2984 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
2985 }
2986 }
2987 }
2988
TEST(QU8_VADDC_MINMAX__WASMSIMD_X16,y_zero_point)2989 TEST(QU8_VADDC_MINMAX__WASMSIMD_X16, y_zero_point) {
2990 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2991 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2992 VAddCMicrokernelTester()
2993 .batch_size(batch_size)
2994 .y_zero_point(y_zero_point)
2995 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
2996 }
2997 }
2998 }
2999
TEST(QU8_VADDC_MINMAX__WASMSIMD_X16,a_scale)3000 TEST(QU8_VADDC_MINMAX__WASMSIMD_X16, a_scale) {
3001 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3002 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3003 VAddCMicrokernelTester()
3004 .batch_size(batch_size)
3005 .a_scale(a_scale)
3006 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3007 }
3008 }
3009 }
3010
TEST(QU8_VADDC_MINMAX__WASMSIMD_X16,b_scale)3011 TEST(QU8_VADDC_MINMAX__WASMSIMD_X16, b_scale) {
3012 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3013 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3014 VAddCMicrokernelTester()
3015 .batch_size(batch_size)
3016 .b_scale(b_scale)
3017 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3018 }
3019 }
3020 }
3021
TEST(QU8_VADDC_MINMAX__WASMSIMD_X16,y_scale)3022 TEST(QU8_VADDC_MINMAX__WASMSIMD_X16, y_scale) {
3023 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3024 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3025 VAddCMicrokernelTester()
3026 .batch_size(batch_size)
3027 .y_scale(y_scale)
3028 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3029 }
3030 }
3031 }
3032
TEST(QU8_VADDC_MINMAX__WASMSIMD_X16,qmin)3033 TEST(QU8_VADDC_MINMAX__WASMSIMD_X16, qmin) {
3034 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3035 VAddCMicrokernelTester()
3036 .batch_size(batch_size)
3037 .qmin(128)
3038 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3039 }
3040 }
3041
TEST(QU8_VADDC_MINMAX__WASMSIMD_X16,qmax)3042 TEST(QU8_VADDC_MINMAX__WASMSIMD_X16, qmax) {
3043 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3044 VAddCMicrokernelTester()
3045 .batch_size(batch_size)
3046 .qmax(128)
3047 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qu8_add_minmax_wasmsimd_params);
3048 }
3049 }
3050 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3051
3052
3053 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_VADDC_MINMAX__WASMSIMD_X32,batch_eq_32)3054 TEST(QU8_VADDC_MINMAX__WASMSIMD_X32, batch_eq_32) {
3055 VAddCMicrokernelTester()
3056 .batch_size(32)
3057 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3058 }
3059
TEST(QU8_VADDC_MINMAX__WASMSIMD_X32,batch_div_32)3060 TEST(QU8_VADDC_MINMAX__WASMSIMD_X32, batch_div_32) {
3061 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
3062 VAddCMicrokernelTester()
3063 .batch_size(batch_size)
3064 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3065 }
3066 }
3067
TEST(QU8_VADDC_MINMAX__WASMSIMD_X32,batch_lt_32)3068 TEST(QU8_VADDC_MINMAX__WASMSIMD_X32, batch_lt_32) {
3069 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
3070 VAddCMicrokernelTester()
3071 .batch_size(batch_size)
3072 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3073 }
3074 }
3075
TEST(QU8_VADDC_MINMAX__WASMSIMD_X32,batch_gt_32)3076 TEST(QU8_VADDC_MINMAX__WASMSIMD_X32, batch_gt_32) {
3077 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
3078 VAddCMicrokernelTester()
3079 .batch_size(batch_size)
3080 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3081 }
3082 }
3083
TEST(QU8_VADDC_MINMAX__WASMSIMD_X32,inplace)3084 TEST(QU8_VADDC_MINMAX__WASMSIMD_X32, inplace) {
3085 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3086 VAddCMicrokernelTester()
3087 .batch_size(batch_size)
3088 .inplace(true)
3089 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3090 }
3091 }
3092
TEST(QU8_VADDC_MINMAX__WASMSIMD_X32,a_zero_point)3093 TEST(QU8_VADDC_MINMAX__WASMSIMD_X32, a_zero_point) {
3094 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3095 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3096 VAddCMicrokernelTester()
3097 .batch_size(batch_size)
3098 .a_zero_point(a_zero_point)
3099 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3100 }
3101 }
3102 }
3103
TEST(QU8_VADDC_MINMAX__WASMSIMD_X32,b_zero_point)3104 TEST(QU8_VADDC_MINMAX__WASMSIMD_X32, b_zero_point) {
3105 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3106 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3107 VAddCMicrokernelTester()
3108 .batch_size(batch_size)
3109 .b_zero_point(b_zero_point)
3110 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3111 }
3112 }
3113 }
3114
TEST(QU8_VADDC_MINMAX__WASMSIMD_X32,y_zero_point)3115 TEST(QU8_VADDC_MINMAX__WASMSIMD_X32, y_zero_point) {
3116 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3117 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3118 VAddCMicrokernelTester()
3119 .batch_size(batch_size)
3120 .y_zero_point(y_zero_point)
3121 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3122 }
3123 }
3124 }
3125
TEST(QU8_VADDC_MINMAX__WASMSIMD_X32,a_scale)3126 TEST(QU8_VADDC_MINMAX__WASMSIMD_X32, a_scale) {
3127 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3128 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3129 VAddCMicrokernelTester()
3130 .batch_size(batch_size)
3131 .a_scale(a_scale)
3132 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3133 }
3134 }
3135 }
3136
TEST(QU8_VADDC_MINMAX__WASMSIMD_X32,b_scale)3137 TEST(QU8_VADDC_MINMAX__WASMSIMD_X32, b_scale) {
3138 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3139 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3140 VAddCMicrokernelTester()
3141 .batch_size(batch_size)
3142 .b_scale(b_scale)
3143 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3144 }
3145 }
3146 }
3147
TEST(QU8_VADDC_MINMAX__WASMSIMD_X32,y_scale)3148 TEST(QU8_VADDC_MINMAX__WASMSIMD_X32, y_scale) {
3149 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3150 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3151 VAddCMicrokernelTester()
3152 .batch_size(batch_size)
3153 .y_scale(y_scale)
3154 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3155 }
3156 }
3157 }
3158
TEST(QU8_VADDC_MINMAX__WASMSIMD_X32,qmin)3159 TEST(QU8_VADDC_MINMAX__WASMSIMD_X32, qmin) {
3160 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3161 VAddCMicrokernelTester()
3162 .batch_size(batch_size)
3163 .qmin(128)
3164 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3165 }
3166 }
3167
TEST(QU8_VADDC_MINMAX__WASMSIMD_X32,qmax)3168 TEST(QU8_VADDC_MINMAX__WASMSIMD_X32, qmax) {
3169 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3170 VAddCMicrokernelTester()
3171 .batch_size(batch_size)
3172 .qmax(128)
3173 .Test(xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qu8_add_minmax_wasmsimd_params);
3174 }
3175 }
3176 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3177
3178
TEST(QU8_VADDC_MINMAX__SCALAR_X1,batch_eq_1)3179 TEST(QU8_VADDC_MINMAX__SCALAR_X1, batch_eq_1) {
3180 VAddCMicrokernelTester()
3181 .batch_size(1)
3182 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3183 }
3184
TEST(QU8_VADDC_MINMAX__SCALAR_X1,batch_gt_1)3185 TEST(QU8_VADDC_MINMAX__SCALAR_X1, batch_gt_1) {
3186 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
3187 VAddCMicrokernelTester()
3188 .batch_size(batch_size)
3189 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3190 }
3191 }
3192
TEST(QU8_VADDC_MINMAX__SCALAR_X1,inplace)3193 TEST(QU8_VADDC_MINMAX__SCALAR_X1, inplace) {
3194 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3195 VAddCMicrokernelTester()
3196 .batch_size(batch_size)
3197 .inplace(true)
3198 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3199 }
3200 }
3201
TEST(QU8_VADDC_MINMAX__SCALAR_X1,a_zero_point)3202 TEST(QU8_VADDC_MINMAX__SCALAR_X1, a_zero_point) {
3203 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3204 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3205 VAddCMicrokernelTester()
3206 .batch_size(batch_size)
3207 .a_zero_point(a_zero_point)
3208 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3209 }
3210 }
3211 }
3212
TEST(QU8_VADDC_MINMAX__SCALAR_X1,b_zero_point)3213 TEST(QU8_VADDC_MINMAX__SCALAR_X1, b_zero_point) {
3214 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3215 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3216 VAddCMicrokernelTester()
3217 .batch_size(batch_size)
3218 .b_zero_point(b_zero_point)
3219 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3220 }
3221 }
3222 }
3223
TEST(QU8_VADDC_MINMAX__SCALAR_X1,y_zero_point)3224 TEST(QU8_VADDC_MINMAX__SCALAR_X1, y_zero_point) {
3225 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3226 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3227 VAddCMicrokernelTester()
3228 .batch_size(batch_size)
3229 .y_zero_point(y_zero_point)
3230 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3231 }
3232 }
3233 }
3234
TEST(QU8_VADDC_MINMAX__SCALAR_X1,a_scale)3235 TEST(QU8_VADDC_MINMAX__SCALAR_X1, a_scale) {
3236 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3237 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3238 VAddCMicrokernelTester()
3239 .batch_size(batch_size)
3240 .a_scale(a_scale)
3241 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3242 }
3243 }
3244 }
3245
TEST(QU8_VADDC_MINMAX__SCALAR_X1,b_scale)3246 TEST(QU8_VADDC_MINMAX__SCALAR_X1, b_scale) {
3247 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3248 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3249 VAddCMicrokernelTester()
3250 .batch_size(batch_size)
3251 .b_scale(b_scale)
3252 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3253 }
3254 }
3255 }
3256
TEST(QU8_VADDC_MINMAX__SCALAR_X1,y_scale)3257 TEST(QU8_VADDC_MINMAX__SCALAR_X1, y_scale) {
3258 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3259 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3260 VAddCMicrokernelTester()
3261 .batch_size(batch_size)
3262 .y_scale(y_scale)
3263 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3264 }
3265 }
3266 }
3267
TEST(QU8_VADDC_MINMAX__SCALAR_X1,qmin)3268 TEST(QU8_VADDC_MINMAX__SCALAR_X1, qmin) {
3269 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3270 VAddCMicrokernelTester()
3271 .batch_size(batch_size)
3272 .qmin(128)
3273 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3274 }
3275 }
3276
TEST(QU8_VADDC_MINMAX__SCALAR_X1,qmax)3277 TEST(QU8_VADDC_MINMAX__SCALAR_X1, qmax) {
3278 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
3279 VAddCMicrokernelTester()
3280 .batch_size(batch_size)
3281 .qmax(128)
3282 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qu8_add_minmax_scalar_params);
3283 }
3284 }
3285
TEST(QU8_VADDC_MINMAX__SCALAR_X2,batch_eq_2)3286 TEST(QU8_VADDC_MINMAX__SCALAR_X2, batch_eq_2) {
3287 VAddCMicrokernelTester()
3288 .batch_size(2)
3289 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3290 }
3291
TEST(QU8_VADDC_MINMAX__SCALAR_X2,batch_div_2)3292 TEST(QU8_VADDC_MINMAX__SCALAR_X2, batch_div_2) {
3293 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
3294 VAddCMicrokernelTester()
3295 .batch_size(batch_size)
3296 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3297 }
3298 }
3299
TEST(QU8_VADDC_MINMAX__SCALAR_X2,batch_lt_2)3300 TEST(QU8_VADDC_MINMAX__SCALAR_X2, batch_lt_2) {
3301 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
3302 VAddCMicrokernelTester()
3303 .batch_size(batch_size)
3304 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3305 }
3306 }
3307
TEST(QU8_VADDC_MINMAX__SCALAR_X2,batch_gt_2)3308 TEST(QU8_VADDC_MINMAX__SCALAR_X2, batch_gt_2) {
3309 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
3310 VAddCMicrokernelTester()
3311 .batch_size(batch_size)
3312 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3313 }
3314 }
3315
TEST(QU8_VADDC_MINMAX__SCALAR_X2,inplace)3316 TEST(QU8_VADDC_MINMAX__SCALAR_X2, inplace) {
3317 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3318 VAddCMicrokernelTester()
3319 .batch_size(batch_size)
3320 .inplace(true)
3321 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3322 }
3323 }
3324
TEST(QU8_VADDC_MINMAX__SCALAR_X2,a_zero_point)3325 TEST(QU8_VADDC_MINMAX__SCALAR_X2, a_zero_point) {
3326 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3327 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3328 VAddCMicrokernelTester()
3329 .batch_size(batch_size)
3330 .a_zero_point(a_zero_point)
3331 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3332 }
3333 }
3334 }
3335
TEST(QU8_VADDC_MINMAX__SCALAR_X2,b_zero_point)3336 TEST(QU8_VADDC_MINMAX__SCALAR_X2, b_zero_point) {
3337 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3338 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3339 VAddCMicrokernelTester()
3340 .batch_size(batch_size)
3341 .b_zero_point(b_zero_point)
3342 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3343 }
3344 }
3345 }
3346
TEST(QU8_VADDC_MINMAX__SCALAR_X2,y_zero_point)3347 TEST(QU8_VADDC_MINMAX__SCALAR_X2, y_zero_point) {
3348 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3349 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3350 VAddCMicrokernelTester()
3351 .batch_size(batch_size)
3352 .y_zero_point(y_zero_point)
3353 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3354 }
3355 }
3356 }
3357
TEST(QU8_VADDC_MINMAX__SCALAR_X2,a_scale)3358 TEST(QU8_VADDC_MINMAX__SCALAR_X2, a_scale) {
3359 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3360 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3361 VAddCMicrokernelTester()
3362 .batch_size(batch_size)
3363 .a_scale(a_scale)
3364 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3365 }
3366 }
3367 }
3368
TEST(QU8_VADDC_MINMAX__SCALAR_X2,b_scale)3369 TEST(QU8_VADDC_MINMAX__SCALAR_X2, b_scale) {
3370 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3371 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3372 VAddCMicrokernelTester()
3373 .batch_size(batch_size)
3374 .b_scale(b_scale)
3375 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3376 }
3377 }
3378 }
3379
TEST(QU8_VADDC_MINMAX__SCALAR_X2,y_scale)3380 TEST(QU8_VADDC_MINMAX__SCALAR_X2, y_scale) {
3381 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3382 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3383 VAddCMicrokernelTester()
3384 .batch_size(batch_size)
3385 .y_scale(y_scale)
3386 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3387 }
3388 }
3389 }
3390
TEST(QU8_VADDC_MINMAX__SCALAR_X2,qmin)3391 TEST(QU8_VADDC_MINMAX__SCALAR_X2, qmin) {
3392 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3393 VAddCMicrokernelTester()
3394 .batch_size(batch_size)
3395 .qmin(128)
3396 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3397 }
3398 }
3399
TEST(QU8_VADDC_MINMAX__SCALAR_X2,qmax)3400 TEST(QU8_VADDC_MINMAX__SCALAR_X2, qmax) {
3401 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
3402 VAddCMicrokernelTester()
3403 .batch_size(batch_size)
3404 .qmax(128)
3405 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qu8_add_minmax_scalar_params);
3406 }
3407 }
3408
TEST(QU8_VADDC_MINMAX__SCALAR_X4,batch_eq_4)3409 TEST(QU8_VADDC_MINMAX__SCALAR_X4, batch_eq_4) {
3410 VAddCMicrokernelTester()
3411 .batch_size(4)
3412 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3413 }
3414
TEST(QU8_VADDC_MINMAX__SCALAR_X4,batch_div_4)3415 TEST(QU8_VADDC_MINMAX__SCALAR_X4, batch_div_4) {
3416 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
3417 VAddCMicrokernelTester()
3418 .batch_size(batch_size)
3419 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3420 }
3421 }
3422
TEST(QU8_VADDC_MINMAX__SCALAR_X4,batch_lt_4)3423 TEST(QU8_VADDC_MINMAX__SCALAR_X4, batch_lt_4) {
3424 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
3425 VAddCMicrokernelTester()
3426 .batch_size(batch_size)
3427 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3428 }
3429 }
3430
TEST(QU8_VADDC_MINMAX__SCALAR_X4,batch_gt_4)3431 TEST(QU8_VADDC_MINMAX__SCALAR_X4, batch_gt_4) {
3432 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
3433 VAddCMicrokernelTester()
3434 .batch_size(batch_size)
3435 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3436 }
3437 }
3438
TEST(QU8_VADDC_MINMAX__SCALAR_X4,inplace)3439 TEST(QU8_VADDC_MINMAX__SCALAR_X4, inplace) {
3440 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3441 VAddCMicrokernelTester()
3442 .batch_size(batch_size)
3443 .inplace(true)
3444 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3445 }
3446 }
3447
TEST(QU8_VADDC_MINMAX__SCALAR_X4,a_zero_point)3448 TEST(QU8_VADDC_MINMAX__SCALAR_X4, a_zero_point) {
3449 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3450 for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3451 VAddCMicrokernelTester()
3452 .batch_size(batch_size)
3453 .a_zero_point(a_zero_point)
3454 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3455 }
3456 }
3457 }
3458
TEST(QU8_VADDC_MINMAX__SCALAR_X4,b_zero_point)3459 TEST(QU8_VADDC_MINMAX__SCALAR_X4, b_zero_point) {
3460 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3461 for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3462 VAddCMicrokernelTester()
3463 .batch_size(batch_size)
3464 .b_zero_point(b_zero_point)
3465 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3466 }
3467 }
3468 }
3469
TEST(QU8_VADDC_MINMAX__SCALAR_X4,y_zero_point)3470 TEST(QU8_VADDC_MINMAX__SCALAR_X4, y_zero_point) {
3471 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3472 for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3473 VAddCMicrokernelTester()
3474 .batch_size(batch_size)
3475 .y_zero_point(y_zero_point)
3476 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3477 }
3478 }
3479 }
3480
TEST(QU8_VADDC_MINMAX__SCALAR_X4,a_scale)3481 TEST(QU8_VADDC_MINMAX__SCALAR_X4, a_scale) {
3482 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3483 for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3484 VAddCMicrokernelTester()
3485 .batch_size(batch_size)
3486 .a_scale(a_scale)
3487 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3488 }
3489 }
3490 }
3491
TEST(QU8_VADDC_MINMAX__SCALAR_X4,b_scale)3492 TEST(QU8_VADDC_MINMAX__SCALAR_X4, b_scale) {
3493 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3494 for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3495 VAddCMicrokernelTester()
3496 .batch_size(batch_size)
3497 .b_scale(b_scale)
3498 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3499 }
3500 }
3501 }
3502
TEST(QU8_VADDC_MINMAX__SCALAR_X4,y_scale)3503 TEST(QU8_VADDC_MINMAX__SCALAR_X4, y_scale) {
3504 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3505 for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3506 VAddCMicrokernelTester()
3507 .batch_size(batch_size)
3508 .y_scale(y_scale)
3509 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3510 }
3511 }
3512 }
3513
TEST(QU8_VADDC_MINMAX__SCALAR_X4,qmin)3514 TEST(QU8_VADDC_MINMAX__SCALAR_X4, qmin) {
3515 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3516 VAddCMicrokernelTester()
3517 .batch_size(batch_size)
3518 .qmin(128)
3519 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3520 }
3521 }
3522
TEST(QU8_VADDC_MINMAX__SCALAR_X4,qmax)3523 TEST(QU8_VADDC_MINMAX__SCALAR_X4, qmax) {
3524 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3525 VAddCMicrokernelTester()
3526 .batch_size(batch_size)
3527 .qmax(128)
3528 .Test(xnn_qu8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qu8_add_minmax_scalar_params);
3529 }
3530 }