• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 //   Specification: test/qs8-vaddc-minmax.yaml
8 //   Generator: tools/generate-vbinary-test.py
9 
10 
11 #include <gtest/gtest.h>
12 
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15 
16 #include <xnnpack/microparams-init.h>
17 #include <xnnpack/vadd.h>
18 #include "vaddc-microkernel-tester.h"
19 
20 
21 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,batch_eq_8)22   TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, batch_eq_8) {
23     TEST_REQUIRES_ARM_NEON;
24     VAddCMicrokernelTester()
25       .batch_size(8)
26       .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
27   }
28 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,batch_div_8)29   TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, batch_div_8) {
30     TEST_REQUIRES_ARM_NEON;
31     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
32       VAddCMicrokernelTester()
33         .batch_size(batch_size)
34         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
35     }
36   }
37 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,batch_lt_8)38   TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, batch_lt_8) {
39     TEST_REQUIRES_ARM_NEON;
40     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
41       VAddCMicrokernelTester()
42         .batch_size(batch_size)
43         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
44     }
45   }
46 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,batch_gt_8)47   TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, batch_gt_8) {
48     TEST_REQUIRES_ARM_NEON;
49     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
50       VAddCMicrokernelTester()
51         .batch_size(batch_size)
52         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
53     }
54   }
55 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,inplace)56   TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, inplace) {
57     TEST_REQUIRES_ARM_NEON;
58     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
59       VAddCMicrokernelTester()
60         .batch_size(batch_size)
61         .inplace(true)
62         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
63     }
64   }
65 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,a_zero_point)66   TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, a_zero_point) {
67     TEST_REQUIRES_ARM_NEON;
68     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
69       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
70         VAddCMicrokernelTester()
71           .batch_size(batch_size)
72           .a_zero_point(a_zero_point)
73           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
74       }
75     }
76   }
77 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,b_zero_point)78   TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, b_zero_point) {
79     TEST_REQUIRES_ARM_NEON;
80     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
81       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
82         VAddCMicrokernelTester()
83           .batch_size(batch_size)
84           .b_zero_point(b_zero_point)
85           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
86       }
87     }
88   }
89 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,y_zero_point)90   TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, y_zero_point) {
91     TEST_REQUIRES_ARM_NEON;
92     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
93       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
94         VAddCMicrokernelTester()
95           .batch_size(batch_size)
96           .y_zero_point(y_zero_point)
97           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
98       }
99     }
100   }
101 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,a_scale)102   TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, a_scale) {
103     TEST_REQUIRES_ARM_NEON;
104     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
105       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
106         VAddCMicrokernelTester()
107           .batch_size(batch_size)
108           .a_scale(a_scale)
109           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
110       }
111     }
112   }
113 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,b_scale)114   TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, b_scale) {
115     TEST_REQUIRES_ARM_NEON;
116     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
117       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
118         VAddCMicrokernelTester()
119           .batch_size(batch_size)
120           .b_scale(b_scale)
121           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
122       }
123     }
124   }
125 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,y_scale)126   TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, y_scale) {
127     TEST_REQUIRES_ARM_NEON;
128     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
129       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
130         VAddCMicrokernelTester()
131           .batch_size(batch_size)
132           .y_scale(y_scale)
133           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
134       }
135     }
136   }
137 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,qmin)138   TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, qmin) {
139     TEST_REQUIRES_ARM_NEON;
140     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
141       VAddCMicrokernelTester()
142         .batch_size(batch_size)
143         .qmin(128)
144         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
145     }
146   }
147 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X8,qmax)148   TEST(QS8_VADDC_MINMAX__NEON_LD64_X8, qmax) {
149     TEST_REQUIRES_ARM_NEON;
150     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
151       VAddCMicrokernelTester()
152         .batch_size(batch_size)
153         .qmax(128)
154         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8, xnn_init_qs8_add_minmax_neon_params);
155     }
156   }
157 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
158 
159 
160 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,batch_eq_16)161   TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, batch_eq_16) {
162     TEST_REQUIRES_ARM_NEON;
163     VAddCMicrokernelTester()
164       .batch_size(16)
165       .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
166   }
167 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,batch_div_16)168   TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, batch_div_16) {
169     TEST_REQUIRES_ARM_NEON;
170     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
171       VAddCMicrokernelTester()
172         .batch_size(batch_size)
173         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
174     }
175   }
176 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,batch_lt_16)177   TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, batch_lt_16) {
178     TEST_REQUIRES_ARM_NEON;
179     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
180       VAddCMicrokernelTester()
181         .batch_size(batch_size)
182         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
183     }
184   }
185 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,batch_gt_16)186   TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, batch_gt_16) {
187     TEST_REQUIRES_ARM_NEON;
188     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
189       VAddCMicrokernelTester()
190         .batch_size(batch_size)
191         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
192     }
193   }
194 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,inplace)195   TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, inplace) {
196     TEST_REQUIRES_ARM_NEON;
197     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
198       VAddCMicrokernelTester()
199         .batch_size(batch_size)
200         .inplace(true)
201         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
202     }
203   }
204 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,a_zero_point)205   TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, a_zero_point) {
206     TEST_REQUIRES_ARM_NEON;
207     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
208       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
209         VAddCMicrokernelTester()
210           .batch_size(batch_size)
211           .a_zero_point(a_zero_point)
212           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
213       }
214     }
215   }
216 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,b_zero_point)217   TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, b_zero_point) {
218     TEST_REQUIRES_ARM_NEON;
219     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
220       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
221         VAddCMicrokernelTester()
222           .batch_size(batch_size)
223           .b_zero_point(b_zero_point)
224           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
225       }
226     }
227   }
228 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,y_zero_point)229   TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, y_zero_point) {
230     TEST_REQUIRES_ARM_NEON;
231     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
232       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
233         VAddCMicrokernelTester()
234           .batch_size(batch_size)
235           .y_zero_point(y_zero_point)
236           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
237       }
238     }
239   }
240 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,a_scale)241   TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, a_scale) {
242     TEST_REQUIRES_ARM_NEON;
243     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
244       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
245         VAddCMicrokernelTester()
246           .batch_size(batch_size)
247           .a_scale(a_scale)
248           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
249       }
250     }
251   }
252 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,b_scale)253   TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, b_scale) {
254     TEST_REQUIRES_ARM_NEON;
255     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
256       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
257         VAddCMicrokernelTester()
258           .batch_size(batch_size)
259           .b_scale(b_scale)
260           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
261       }
262     }
263   }
264 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,y_scale)265   TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, y_scale) {
266     TEST_REQUIRES_ARM_NEON;
267     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
268       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
269         VAddCMicrokernelTester()
270           .batch_size(batch_size)
271           .y_scale(y_scale)
272           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
273       }
274     }
275   }
276 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,qmin)277   TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, qmin) {
278     TEST_REQUIRES_ARM_NEON;
279     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
280       VAddCMicrokernelTester()
281         .batch_size(batch_size)
282         .qmin(128)
283         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
284     }
285   }
286 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X16,qmax)287   TEST(QS8_VADDC_MINMAX__NEON_LD64_X16, qmax) {
288     TEST_REQUIRES_ARM_NEON;
289     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
290       VAddCMicrokernelTester()
291         .batch_size(batch_size)
292         .qmax(128)
293         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16, xnn_init_qs8_add_minmax_neon_params);
294     }
295   }
296 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
297 
298 
299 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,batch_eq_24)300   TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, batch_eq_24) {
301     TEST_REQUIRES_ARM_NEON;
302     VAddCMicrokernelTester()
303       .batch_size(24)
304       .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
305   }
306 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,batch_div_24)307   TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, batch_div_24) {
308     TEST_REQUIRES_ARM_NEON;
309     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
310       VAddCMicrokernelTester()
311         .batch_size(batch_size)
312         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
313     }
314   }
315 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,batch_lt_24)316   TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, batch_lt_24) {
317     TEST_REQUIRES_ARM_NEON;
318     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
319       VAddCMicrokernelTester()
320         .batch_size(batch_size)
321         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
322     }
323   }
324 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,batch_gt_24)325   TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, batch_gt_24) {
326     TEST_REQUIRES_ARM_NEON;
327     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
328       VAddCMicrokernelTester()
329         .batch_size(batch_size)
330         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
331     }
332   }
333 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,inplace)334   TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, inplace) {
335     TEST_REQUIRES_ARM_NEON;
336     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
337       VAddCMicrokernelTester()
338         .batch_size(batch_size)
339         .inplace(true)
340         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
341     }
342   }
343 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,a_zero_point)344   TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, a_zero_point) {
345     TEST_REQUIRES_ARM_NEON;
346     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
347       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
348         VAddCMicrokernelTester()
349           .batch_size(batch_size)
350           .a_zero_point(a_zero_point)
351           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
352       }
353     }
354   }
355 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,b_zero_point)356   TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, b_zero_point) {
357     TEST_REQUIRES_ARM_NEON;
358     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
359       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
360         VAddCMicrokernelTester()
361           .batch_size(batch_size)
362           .b_zero_point(b_zero_point)
363           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
364       }
365     }
366   }
367 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,y_zero_point)368   TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, y_zero_point) {
369     TEST_REQUIRES_ARM_NEON;
370     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
371       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
372         VAddCMicrokernelTester()
373           .batch_size(batch_size)
374           .y_zero_point(y_zero_point)
375           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
376       }
377     }
378   }
379 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,a_scale)380   TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, a_scale) {
381     TEST_REQUIRES_ARM_NEON;
382     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
383       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
384         VAddCMicrokernelTester()
385           .batch_size(batch_size)
386           .a_scale(a_scale)
387           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
388       }
389     }
390   }
391 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,b_scale)392   TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, b_scale) {
393     TEST_REQUIRES_ARM_NEON;
394     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
395       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
396         VAddCMicrokernelTester()
397           .batch_size(batch_size)
398           .b_scale(b_scale)
399           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
400       }
401     }
402   }
403 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,y_scale)404   TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, y_scale) {
405     TEST_REQUIRES_ARM_NEON;
406     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
407       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
408         VAddCMicrokernelTester()
409           .batch_size(batch_size)
410           .y_scale(y_scale)
411           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
412       }
413     }
414   }
415 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,qmin)416   TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, qmin) {
417     TEST_REQUIRES_ARM_NEON;
418     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
419       VAddCMicrokernelTester()
420         .batch_size(batch_size)
421         .qmin(128)
422         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
423     }
424   }
425 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X24,qmax)426   TEST(QS8_VADDC_MINMAX__NEON_LD64_X24, qmax) {
427     TEST_REQUIRES_ARM_NEON;
428     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
429       VAddCMicrokernelTester()
430         .batch_size(batch_size)
431         .qmax(128)
432         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x24, xnn_init_qs8_add_minmax_neon_params);
433     }
434   }
435 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
436 
437 
438 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,batch_eq_32)439   TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, batch_eq_32) {
440     TEST_REQUIRES_ARM_NEON;
441     VAddCMicrokernelTester()
442       .batch_size(32)
443       .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
444   }
445 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,batch_div_32)446   TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, batch_div_32) {
447     TEST_REQUIRES_ARM_NEON;
448     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
449       VAddCMicrokernelTester()
450         .batch_size(batch_size)
451         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
452     }
453   }
454 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,batch_lt_32)455   TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, batch_lt_32) {
456     TEST_REQUIRES_ARM_NEON;
457     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
458       VAddCMicrokernelTester()
459         .batch_size(batch_size)
460         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
461     }
462   }
463 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,batch_gt_32)464   TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, batch_gt_32) {
465     TEST_REQUIRES_ARM_NEON;
466     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
467       VAddCMicrokernelTester()
468         .batch_size(batch_size)
469         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
470     }
471   }
472 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,inplace)473   TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, inplace) {
474     TEST_REQUIRES_ARM_NEON;
475     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
476       VAddCMicrokernelTester()
477         .batch_size(batch_size)
478         .inplace(true)
479         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
480     }
481   }
482 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,a_zero_point)483   TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, a_zero_point) {
484     TEST_REQUIRES_ARM_NEON;
485     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
486       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
487         VAddCMicrokernelTester()
488           .batch_size(batch_size)
489           .a_zero_point(a_zero_point)
490           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
491       }
492     }
493   }
494 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,b_zero_point)495   TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, b_zero_point) {
496     TEST_REQUIRES_ARM_NEON;
497     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
498       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
499         VAddCMicrokernelTester()
500           .batch_size(batch_size)
501           .b_zero_point(b_zero_point)
502           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
503       }
504     }
505   }
506 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,y_zero_point)507   TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, y_zero_point) {
508     TEST_REQUIRES_ARM_NEON;
509     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
510       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
511         VAddCMicrokernelTester()
512           .batch_size(batch_size)
513           .y_zero_point(y_zero_point)
514           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
515       }
516     }
517   }
518 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,a_scale)519   TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, a_scale) {
520     TEST_REQUIRES_ARM_NEON;
521     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
522       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
523         VAddCMicrokernelTester()
524           .batch_size(batch_size)
525           .a_scale(a_scale)
526           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
527       }
528     }
529   }
530 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,b_scale)531   TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, b_scale) {
532     TEST_REQUIRES_ARM_NEON;
533     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
534       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
535         VAddCMicrokernelTester()
536           .batch_size(batch_size)
537           .b_scale(b_scale)
538           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
539       }
540     }
541   }
542 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,y_scale)543   TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, y_scale) {
544     TEST_REQUIRES_ARM_NEON;
545     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
546       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
547         VAddCMicrokernelTester()
548           .batch_size(batch_size)
549           .y_scale(y_scale)
550           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
551       }
552     }
553   }
554 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,qmin)555   TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, qmin) {
556     TEST_REQUIRES_ARM_NEON;
557     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
558       VAddCMicrokernelTester()
559         .batch_size(batch_size)
560         .qmin(128)
561         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
562     }
563   }
564 
TEST(QS8_VADDC_MINMAX__NEON_LD64_X32,qmax)565   TEST(QS8_VADDC_MINMAX__NEON_LD64_X32, qmax) {
566     TEST_REQUIRES_ARM_NEON;
567     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
568       VAddCMicrokernelTester()
569         .batch_size(batch_size)
570         .qmax(128)
571         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x32, xnn_init_qs8_add_minmax_neon_params);
572     }
573   }
574 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
575 
576 
577 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,batch_eq_16)578   TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, batch_eq_16) {
579     TEST_REQUIRES_ARM_NEON;
580     VAddCMicrokernelTester()
581       .batch_size(16)
582       .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
583   }
584 
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,batch_div_16)585   TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, batch_div_16) {
586     TEST_REQUIRES_ARM_NEON;
587     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
588       VAddCMicrokernelTester()
589         .batch_size(batch_size)
590         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
591     }
592   }
593 
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,batch_lt_16)594   TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, batch_lt_16) {
595     TEST_REQUIRES_ARM_NEON;
596     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
597       VAddCMicrokernelTester()
598         .batch_size(batch_size)
599         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
600     }
601   }
602 
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,batch_gt_16)603   TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, batch_gt_16) {
604     TEST_REQUIRES_ARM_NEON;
605     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
606       VAddCMicrokernelTester()
607         .batch_size(batch_size)
608         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
609     }
610   }
611 
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,inplace)612   TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, inplace) {
613     TEST_REQUIRES_ARM_NEON;
614     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
615       VAddCMicrokernelTester()
616         .batch_size(batch_size)
617         .inplace(true)
618         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
619     }
620   }
621 
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,a_zero_point)622   TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, a_zero_point) {
623     TEST_REQUIRES_ARM_NEON;
624     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
625       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
626         VAddCMicrokernelTester()
627           .batch_size(batch_size)
628           .a_zero_point(a_zero_point)
629           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
630       }
631     }
632   }
633 
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,b_zero_point)634   TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, b_zero_point) {
635     TEST_REQUIRES_ARM_NEON;
636     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
637       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
638         VAddCMicrokernelTester()
639           .batch_size(batch_size)
640           .b_zero_point(b_zero_point)
641           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
642       }
643     }
644   }
645 
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,y_zero_point)646   TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, y_zero_point) {
647     TEST_REQUIRES_ARM_NEON;
648     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
649       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
650         VAddCMicrokernelTester()
651           .batch_size(batch_size)
652           .y_zero_point(y_zero_point)
653           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
654       }
655     }
656   }
657 
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,a_scale)658   TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, a_scale) {
659     TEST_REQUIRES_ARM_NEON;
660     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
661       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
662         VAddCMicrokernelTester()
663           .batch_size(batch_size)
664           .a_scale(a_scale)
665           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
666       }
667     }
668   }
669 
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,b_scale)670   TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, b_scale) {
671     TEST_REQUIRES_ARM_NEON;
672     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
673       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
674         VAddCMicrokernelTester()
675           .batch_size(batch_size)
676           .b_scale(b_scale)
677           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
678       }
679     }
680   }
681 
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,y_scale)682   TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, y_scale) {
683     TEST_REQUIRES_ARM_NEON;
684     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
685       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
686         VAddCMicrokernelTester()
687           .batch_size(batch_size)
688           .y_scale(y_scale)
689           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
690       }
691     }
692   }
693 
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,qmin)694   TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, qmin) {
695     TEST_REQUIRES_ARM_NEON;
696     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
697       VAddCMicrokernelTester()
698         .batch_size(batch_size)
699         .qmin(128)
700         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
701     }
702   }
703 
TEST(QS8_VADDC_MINMAX__NEON_LD128_X16,qmax)704   TEST(QS8_VADDC_MINMAX__NEON_LD128_X16, qmax) {
705     TEST_REQUIRES_ARM_NEON;
706     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
707       VAddCMicrokernelTester()
708         .batch_size(batch_size)
709         .qmax(128)
710         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x16, xnn_init_qs8_add_minmax_neon_params);
711     }
712   }
713 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
714 
715 
716 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,batch_eq_32)717   TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, batch_eq_32) {
718     TEST_REQUIRES_ARM_NEON;
719     VAddCMicrokernelTester()
720       .batch_size(32)
721       .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
722   }
723 
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,batch_div_32)724   TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, batch_div_32) {
725     TEST_REQUIRES_ARM_NEON;
726     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
727       VAddCMicrokernelTester()
728         .batch_size(batch_size)
729         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
730     }
731   }
732 
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,batch_lt_32)733   TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, batch_lt_32) {
734     TEST_REQUIRES_ARM_NEON;
735     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
736       VAddCMicrokernelTester()
737         .batch_size(batch_size)
738         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
739     }
740   }
741 
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,batch_gt_32)742   TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, batch_gt_32) {
743     TEST_REQUIRES_ARM_NEON;
744     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
745       VAddCMicrokernelTester()
746         .batch_size(batch_size)
747         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
748     }
749   }
750 
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,inplace)751   TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, inplace) {
752     TEST_REQUIRES_ARM_NEON;
753     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
754       VAddCMicrokernelTester()
755         .batch_size(batch_size)
756         .inplace(true)
757         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
758     }
759   }
760 
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,a_zero_point)761   TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, a_zero_point) {
762     TEST_REQUIRES_ARM_NEON;
763     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
764       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
765         VAddCMicrokernelTester()
766           .batch_size(batch_size)
767           .a_zero_point(a_zero_point)
768           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
769       }
770     }
771   }
772 
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,b_zero_point)773   TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, b_zero_point) {
774     TEST_REQUIRES_ARM_NEON;
775     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
776       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
777         VAddCMicrokernelTester()
778           .batch_size(batch_size)
779           .b_zero_point(b_zero_point)
780           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
781       }
782     }
783   }
784 
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,y_zero_point)785   TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, y_zero_point) {
786     TEST_REQUIRES_ARM_NEON;
787     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
788       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
789         VAddCMicrokernelTester()
790           .batch_size(batch_size)
791           .y_zero_point(y_zero_point)
792           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
793       }
794     }
795   }
796 
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,a_scale)797   TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, a_scale) {
798     TEST_REQUIRES_ARM_NEON;
799     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
800       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
801         VAddCMicrokernelTester()
802           .batch_size(batch_size)
803           .a_scale(a_scale)
804           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
805       }
806     }
807   }
808 
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,b_scale)809   TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, b_scale) {
810     TEST_REQUIRES_ARM_NEON;
811     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
812       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
813         VAddCMicrokernelTester()
814           .batch_size(batch_size)
815           .b_scale(b_scale)
816           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
817       }
818     }
819   }
820 
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,y_scale)821   TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, y_scale) {
822     TEST_REQUIRES_ARM_NEON;
823     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
824       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
825         VAddCMicrokernelTester()
826           .batch_size(batch_size)
827           .y_scale(y_scale)
828           .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
829       }
830     }
831   }
832 
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,qmin)833   TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, qmin) {
834     TEST_REQUIRES_ARM_NEON;
835     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
836       VAddCMicrokernelTester()
837         .batch_size(batch_size)
838         .qmin(128)
839         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
840     }
841   }
842 
TEST(QS8_VADDC_MINMAX__NEON_LD128_X32,qmax)843   TEST(QS8_VADDC_MINMAX__NEON_LD128_X32, qmax) {
844     TEST_REQUIRES_ARM_NEON;
845     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
846       VAddCMicrokernelTester()
847         .batch_size(batch_size)
848         .qmax(128)
849         .Test(xnn_qs8_vaddc_minmax_ukernel__neon_ld128_x32, xnn_init_qs8_add_minmax_neon_params);
850     }
851   }
852 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
853 
854 
855 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,batch_eq_8)856   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, batch_eq_8) {
857     TEST_REQUIRES_X86_SSE2;
858     VAddCMicrokernelTester()
859       .batch_size(8)
860       .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
861   }
862 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,batch_div_8)863   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, batch_div_8) {
864     TEST_REQUIRES_X86_SSE2;
865     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
866       VAddCMicrokernelTester()
867         .batch_size(batch_size)
868         .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
869     }
870   }
871 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,batch_lt_8)872   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, batch_lt_8) {
873     TEST_REQUIRES_X86_SSE2;
874     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
875       VAddCMicrokernelTester()
876         .batch_size(batch_size)
877         .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
878     }
879   }
880 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,batch_gt_8)881   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, batch_gt_8) {
882     TEST_REQUIRES_X86_SSE2;
883     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
884       VAddCMicrokernelTester()
885         .batch_size(batch_size)
886         .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
887     }
888   }
889 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,inplace)890   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, inplace) {
891     TEST_REQUIRES_X86_SSE2;
892     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
893       VAddCMicrokernelTester()
894         .batch_size(batch_size)
895         .inplace(true)
896         .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
897     }
898   }
899 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,a_zero_point)900   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, a_zero_point) {
901     TEST_REQUIRES_X86_SSE2;
902     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
903       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
904         VAddCMicrokernelTester()
905           .batch_size(batch_size)
906           .a_zero_point(a_zero_point)
907           .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
908       }
909     }
910   }
911 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,b_zero_point)912   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, b_zero_point) {
913     TEST_REQUIRES_X86_SSE2;
914     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
915       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
916         VAddCMicrokernelTester()
917           .batch_size(batch_size)
918           .b_zero_point(b_zero_point)
919           .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
920       }
921     }
922   }
923 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,y_zero_point)924   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, y_zero_point) {
925     TEST_REQUIRES_X86_SSE2;
926     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
927       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
928         VAddCMicrokernelTester()
929           .batch_size(batch_size)
930           .y_zero_point(y_zero_point)
931           .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
932       }
933     }
934   }
935 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,a_scale)936   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, a_scale) {
937     TEST_REQUIRES_X86_SSE2;
938     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
939       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
940         VAddCMicrokernelTester()
941           .batch_size(batch_size)
942           .a_scale(a_scale)
943           .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
944       }
945     }
946   }
947 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,b_scale)948   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, b_scale) {
949     TEST_REQUIRES_X86_SSE2;
950     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
951       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
952         VAddCMicrokernelTester()
953           .batch_size(batch_size)
954           .b_scale(b_scale)
955           .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
956       }
957     }
958   }
959 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,y_scale)960   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, y_scale) {
961     TEST_REQUIRES_X86_SSE2;
962     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
963       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
964         VAddCMicrokernelTester()
965           .batch_size(batch_size)
966           .y_scale(y_scale)
967           .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
968       }
969     }
970   }
971 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,qmin)972   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, qmin) {
973     TEST_REQUIRES_X86_SSE2;
974     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
975       VAddCMicrokernelTester()
976         .batch_size(batch_size)
977         .qmin(128)
978         .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
979     }
980   }
981 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8,qmax)982   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X8, qmax) {
983     TEST_REQUIRES_X86_SSE2;
984     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
985       VAddCMicrokernelTester()
986         .batch_size(batch_size)
987         .qmax(128)
988         .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse2_params);
989     }
990   }
991 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
992 
993 
994 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,batch_eq_16)995   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, batch_eq_16) {
996     TEST_REQUIRES_X86_SSE2;
997     VAddCMicrokernelTester()
998       .batch_size(16)
999       .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1000   }
1001 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,batch_div_16)1002   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, batch_div_16) {
1003     TEST_REQUIRES_X86_SSE2;
1004     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1005       VAddCMicrokernelTester()
1006         .batch_size(batch_size)
1007         .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1008     }
1009   }
1010 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,batch_lt_16)1011   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, batch_lt_16) {
1012     TEST_REQUIRES_X86_SSE2;
1013     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1014       VAddCMicrokernelTester()
1015         .batch_size(batch_size)
1016         .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1017     }
1018   }
1019 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,batch_gt_16)1020   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, batch_gt_16) {
1021     TEST_REQUIRES_X86_SSE2;
1022     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1023       VAddCMicrokernelTester()
1024         .batch_size(batch_size)
1025         .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1026     }
1027   }
1028 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,inplace)1029   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, inplace) {
1030     TEST_REQUIRES_X86_SSE2;
1031     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1032       VAddCMicrokernelTester()
1033         .batch_size(batch_size)
1034         .inplace(true)
1035         .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1036     }
1037   }
1038 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,a_zero_point)1039   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, a_zero_point) {
1040     TEST_REQUIRES_X86_SSE2;
1041     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1042       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1043         VAddCMicrokernelTester()
1044           .batch_size(batch_size)
1045           .a_zero_point(a_zero_point)
1046           .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1047       }
1048     }
1049   }
1050 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,b_zero_point)1051   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, b_zero_point) {
1052     TEST_REQUIRES_X86_SSE2;
1053     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1054       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1055         VAddCMicrokernelTester()
1056           .batch_size(batch_size)
1057           .b_zero_point(b_zero_point)
1058           .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1059       }
1060     }
1061   }
1062 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,y_zero_point)1063   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, y_zero_point) {
1064     TEST_REQUIRES_X86_SSE2;
1065     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1066       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1067         VAddCMicrokernelTester()
1068           .batch_size(batch_size)
1069           .y_zero_point(y_zero_point)
1070           .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1071       }
1072     }
1073   }
1074 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,a_scale)1075   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, a_scale) {
1076     TEST_REQUIRES_X86_SSE2;
1077     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1078       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1079         VAddCMicrokernelTester()
1080           .batch_size(batch_size)
1081           .a_scale(a_scale)
1082           .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1083       }
1084     }
1085   }
1086 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,b_scale)1087   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, b_scale) {
1088     TEST_REQUIRES_X86_SSE2;
1089     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1090       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1091         VAddCMicrokernelTester()
1092           .batch_size(batch_size)
1093           .b_scale(b_scale)
1094           .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1095       }
1096     }
1097   }
1098 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,y_scale)1099   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, y_scale) {
1100     TEST_REQUIRES_X86_SSE2;
1101     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1102       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1103         VAddCMicrokernelTester()
1104           .batch_size(batch_size)
1105           .y_scale(y_scale)
1106           .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1107       }
1108     }
1109   }
1110 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,qmin)1111   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, qmin) {
1112     TEST_REQUIRES_X86_SSE2;
1113     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1114       VAddCMicrokernelTester()
1115         .batch_size(batch_size)
1116         .qmin(128)
1117         .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1118     }
1119   }
1120 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16,qmax)1121   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X16, qmax) {
1122     TEST_REQUIRES_X86_SSE2;
1123     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1124       VAddCMicrokernelTester()
1125         .batch_size(batch_size)
1126         .qmax(128)
1127         .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse2_params);
1128     }
1129   }
1130 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1131 
1132 
1133 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,batch_eq_24)1134   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, batch_eq_24) {
1135     TEST_REQUIRES_X86_SSE2;
1136     VAddCMicrokernelTester()
1137       .batch_size(24)
1138       .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1139   }
1140 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,batch_div_24)1141   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, batch_div_24) {
1142     TEST_REQUIRES_X86_SSE2;
1143     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1144       VAddCMicrokernelTester()
1145         .batch_size(batch_size)
1146         .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1147     }
1148   }
1149 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,batch_lt_24)1150   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, batch_lt_24) {
1151     TEST_REQUIRES_X86_SSE2;
1152     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1153       VAddCMicrokernelTester()
1154         .batch_size(batch_size)
1155         .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1156     }
1157   }
1158 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,batch_gt_24)1159   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, batch_gt_24) {
1160     TEST_REQUIRES_X86_SSE2;
1161     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1162       VAddCMicrokernelTester()
1163         .batch_size(batch_size)
1164         .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1165     }
1166   }
1167 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,inplace)1168   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, inplace) {
1169     TEST_REQUIRES_X86_SSE2;
1170     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1171       VAddCMicrokernelTester()
1172         .batch_size(batch_size)
1173         .inplace(true)
1174         .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1175     }
1176   }
1177 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,a_zero_point)1178   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, a_zero_point) {
1179     TEST_REQUIRES_X86_SSE2;
1180     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1181       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1182         VAddCMicrokernelTester()
1183           .batch_size(batch_size)
1184           .a_zero_point(a_zero_point)
1185           .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1186       }
1187     }
1188   }
1189 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,b_zero_point)1190   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, b_zero_point) {
1191     TEST_REQUIRES_X86_SSE2;
1192     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1193       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1194         VAddCMicrokernelTester()
1195           .batch_size(batch_size)
1196           .b_zero_point(b_zero_point)
1197           .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1198       }
1199     }
1200   }
1201 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,y_zero_point)1202   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, y_zero_point) {
1203     TEST_REQUIRES_X86_SSE2;
1204     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1205       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1206         VAddCMicrokernelTester()
1207           .batch_size(batch_size)
1208           .y_zero_point(y_zero_point)
1209           .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1210       }
1211     }
1212   }
1213 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,a_scale)1214   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, a_scale) {
1215     TEST_REQUIRES_X86_SSE2;
1216     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1217       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1218         VAddCMicrokernelTester()
1219           .batch_size(batch_size)
1220           .a_scale(a_scale)
1221           .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1222       }
1223     }
1224   }
1225 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,b_scale)1226   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, b_scale) {
1227     TEST_REQUIRES_X86_SSE2;
1228     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1229       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1230         VAddCMicrokernelTester()
1231           .batch_size(batch_size)
1232           .b_scale(b_scale)
1233           .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1234       }
1235     }
1236   }
1237 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,y_scale)1238   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, y_scale) {
1239     TEST_REQUIRES_X86_SSE2;
1240     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1241       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1242         VAddCMicrokernelTester()
1243           .batch_size(batch_size)
1244           .y_scale(y_scale)
1245           .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1246       }
1247     }
1248   }
1249 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,qmin)1250   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, qmin) {
1251     TEST_REQUIRES_X86_SSE2;
1252     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1253       VAddCMicrokernelTester()
1254         .batch_size(batch_size)
1255         .qmin(128)
1256         .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1257     }
1258   }
1259 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24,qmax)1260   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X24, qmax) {
1261     TEST_REQUIRES_X86_SSE2;
1262     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1263       VAddCMicrokernelTester()
1264         .batch_size(batch_size)
1265         .qmax(128)
1266         .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse2_params);
1267     }
1268   }
1269 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1270 
1271 
1272 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,batch_eq_32)1273   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, batch_eq_32) {
1274     TEST_REQUIRES_X86_SSE2;
1275     VAddCMicrokernelTester()
1276       .batch_size(32)
1277       .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1278   }
1279 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,batch_div_32)1280   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, batch_div_32) {
1281     TEST_REQUIRES_X86_SSE2;
1282     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
1283       VAddCMicrokernelTester()
1284         .batch_size(batch_size)
1285         .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1286     }
1287   }
1288 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,batch_lt_32)1289   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, batch_lt_32) {
1290     TEST_REQUIRES_X86_SSE2;
1291     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
1292       VAddCMicrokernelTester()
1293         .batch_size(batch_size)
1294         .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1295     }
1296   }
1297 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,batch_gt_32)1298   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, batch_gt_32) {
1299     TEST_REQUIRES_X86_SSE2;
1300     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
1301       VAddCMicrokernelTester()
1302         .batch_size(batch_size)
1303         .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1304     }
1305   }
1306 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,inplace)1307   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, inplace) {
1308     TEST_REQUIRES_X86_SSE2;
1309     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1310       VAddCMicrokernelTester()
1311         .batch_size(batch_size)
1312         .inplace(true)
1313         .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1314     }
1315   }
1316 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,a_zero_point)1317   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, a_zero_point) {
1318     TEST_REQUIRES_X86_SSE2;
1319     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1320       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1321         VAddCMicrokernelTester()
1322           .batch_size(batch_size)
1323           .a_zero_point(a_zero_point)
1324           .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1325       }
1326     }
1327   }
1328 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,b_zero_point)1329   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, b_zero_point) {
1330     TEST_REQUIRES_X86_SSE2;
1331     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1332       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1333         VAddCMicrokernelTester()
1334           .batch_size(batch_size)
1335           .b_zero_point(b_zero_point)
1336           .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1337       }
1338     }
1339   }
1340 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,y_zero_point)1341   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, y_zero_point) {
1342     TEST_REQUIRES_X86_SSE2;
1343     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1344       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1345         VAddCMicrokernelTester()
1346           .batch_size(batch_size)
1347           .y_zero_point(y_zero_point)
1348           .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1349       }
1350     }
1351   }
1352 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,a_scale)1353   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, a_scale) {
1354     TEST_REQUIRES_X86_SSE2;
1355     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1356       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1357         VAddCMicrokernelTester()
1358           .batch_size(batch_size)
1359           .a_scale(a_scale)
1360           .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1361       }
1362     }
1363   }
1364 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,b_scale)1365   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, b_scale) {
1366     TEST_REQUIRES_X86_SSE2;
1367     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1368       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1369         VAddCMicrokernelTester()
1370           .batch_size(batch_size)
1371           .b_scale(b_scale)
1372           .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1373       }
1374     }
1375   }
1376 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,y_scale)1377   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, y_scale) {
1378     TEST_REQUIRES_X86_SSE2;
1379     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1380       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1381         VAddCMicrokernelTester()
1382           .batch_size(batch_size)
1383           .y_scale(y_scale)
1384           .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1385       }
1386     }
1387   }
1388 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,qmin)1389   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, qmin) {
1390     TEST_REQUIRES_X86_SSE2;
1391     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1392       VAddCMicrokernelTester()
1393         .batch_size(batch_size)
1394         .qmin(128)
1395         .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1396     }
1397   }
1398 
TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32,qmax)1399   TEST(QS8_VADDC_MINMAX__SSE2_MUL16_LD64_X32, qmax) {
1400     TEST_REQUIRES_X86_SSE2;
1401     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1402       VAddCMicrokernelTester()
1403         .batch_size(batch_size)
1404         .qmax(128)
1405         .Test(xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse2_params);
1406     }
1407   }
1408 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1409 
1410 
1411 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,batch_eq_8)1412   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, batch_eq_8) {
1413     TEST_REQUIRES_X86_SSE41;
1414     VAddCMicrokernelTester()
1415       .batch_size(8)
1416       .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1417   }
1418 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,batch_div_8)1419   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, batch_div_8) {
1420     TEST_REQUIRES_X86_SSE41;
1421     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1422       VAddCMicrokernelTester()
1423         .batch_size(batch_size)
1424         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1425     }
1426   }
1427 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,batch_lt_8)1428   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, batch_lt_8) {
1429     TEST_REQUIRES_X86_SSE41;
1430     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1431       VAddCMicrokernelTester()
1432         .batch_size(batch_size)
1433         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1434     }
1435   }
1436 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,batch_gt_8)1437   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, batch_gt_8) {
1438     TEST_REQUIRES_X86_SSE41;
1439     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1440       VAddCMicrokernelTester()
1441         .batch_size(batch_size)
1442         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1443     }
1444   }
1445 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,inplace)1446   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, inplace) {
1447     TEST_REQUIRES_X86_SSE41;
1448     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1449       VAddCMicrokernelTester()
1450         .batch_size(batch_size)
1451         .inplace(true)
1452         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1453     }
1454   }
1455 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,a_zero_point)1456   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, a_zero_point) {
1457     TEST_REQUIRES_X86_SSE41;
1458     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1459       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1460         VAddCMicrokernelTester()
1461           .batch_size(batch_size)
1462           .a_zero_point(a_zero_point)
1463           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1464       }
1465     }
1466   }
1467 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,b_zero_point)1468   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, b_zero_point) {
1469     TEST_REQUIRES_X86_SSE41;
1470     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1471       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1472         VAddCMicrokernelTester()
1473           .batch_size(batch_size)
1474           .b_zero_point(b_zero_point)
1475           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1476       }
1477     }
1478   }
1479 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,y_zero_point)1480   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, y_zero_point) {
1481     TEST_REQUIRES_X86_SSE41;
1482     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1483       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1484         VAddCMicrokernelTester()
1485           .batch_size(batch_size)
1486           .y_zero_point(y_zero_point)
1487           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1488       }
1489     }
1490   }
1491 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,a_scale)1492   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, a_scale) {
1493     TEST_REQUIRES_X86_SSE41;
1494     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1495       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1496         VAddCMicrokernelTester()
1497           .batch_size(batch_size)
1498           .a_scale(a_scale)
1499           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1500       }
1501     }
1502   }
1503 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,b_scale)1504   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, b_scale) {
1505     TEST_REQUIRES_X86_SSE41;
1506     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1507       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1508         VAddCMicrokernelTester()
1509           .batch_size(batch_size)
1510           .b_scale(b_scale)
1511           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1512       }
1513     }
1514   }
1515 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,y_scale)1516   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, y_scale) {
1517     TEST_REQUIRES_X86_SSE41;
1518     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1519       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1520         VAddCMicrokernelTester()
1521           .batch_size(batch_size)
1522           .y_scale(y_scale)
1523           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1524       }
1525     }
1526   }
1527 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,qmin)1528   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, qmin) {
1529     TEST_REQUIRES_X86_SSE41;
1530     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1531       VAddCMicrokernelTester()
1532         .batch_size(batch_size)
1533         .qmin(128)
1534         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1535     }
1536   }
1537 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8,qmax)1538   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X8, qmax) {
1539     TEST_REQUIRES_X86_SSE41;
1540     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1541       VAddCMicrokernelTester()
1542         .batch_size(batch_size)
1543         .qmax(128)
1544         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1545     }
1546   }
1547 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1548 
1549 
1550 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,batch_eq_16)1551   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, batch_eq_16) {
1552     TEST_REQUIRES_X86_SSE41;
1553     VAddCMicrokernelTester()
1554       .batch_size(16)
1555       .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1556   }
1557 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,batch_div_16)1558   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, batch_div_16) {
1559     TEST_REQUIRES_X86_SSE41;
1560     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1561       VAddCMicrokernelTester()
1562         .batch_size(batch_size)
1563         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1564     }
1565   }
1566 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,batch_lt_16)1567   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, batch_lt_16) {
1568     TEST_REQUIRES_X86_SSE41;
1569     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1570       VAddCMicrokernelTester()
1571         .batch_size(batch_size)
1572         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1573     }
1574   }
1575 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,batch_gt_16)1576   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, batch_gt_16) {
1577     TEST_REQUIRES_X86_SSE41;
1578     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1579       VAddCMicrokernelTester()
1580         .batch_size(batch_size)
1581         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1582     }
1583   }
1584 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,inplace)1585   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, inplace) {
1586     TEST_REQUIRES_X86_SSE41;
1587     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1588       VAddCMicrokernelTester()
1589         .batch_size(batch_size)
1590         .inplace(true)
1591         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1592     }
1593   }
1594 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,a_zero_point)1595   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, a_zero_point) {
1596     TEST_REQUIRES_X86_SSE41;
1597     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1598       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1599         VAddCMicrokernelTester()
1600           .batch_size(batch_size)
1601           .a_zero_point(a_zero_point)
1602           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1603       }
1604     }
1605   }
1606 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,b_zero_point)1607   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, b_zero_point) {
1608     TEST_REQUIRES_X86_SSE41;
1609     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1610       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1611         VAddCMicrokernelTester()
1612           .batch_size(batch_size)
1613           .b_zero_point(b_zero_point)
1614           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1615       }
1616     }
1617   }
1618 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,y_zero_point)1619   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, y_zero_point) {
1620     TEST_REQUIRES_X86_SSE41;
1621     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1622       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1623         VAddCMicrokernelTester()
1624           .batch_size(batch_size)
1625           .y_zero_point(y_zero_point)
1626           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1627       }
1628     }
1629   }
1630 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,a_scale)1631   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, a_scale) {
1632     TEST_REQUIRES_X86_SSE41;
1633     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1634       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1635         VAddCMicrokernelTester()
1636           .batch_size(batch_size)
1637           .a_scale(a_scale)
1638           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1639       }
1640     }
1641   }
1642 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,b_scale)1643   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, b_scale) {
1644     TEST_REQUIRES_X86_SSE41;
1645     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1646       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1647         VAddCMicrokernelTester()
1648           .batch_size(batch_size)
1649           .b_scale(b_scale)
1650           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1651       }
1652     }
1653   }
1654 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,y_scale)1655   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, y_scale) {
1656     TEST_REQUIRES_X86_SSE41;
1657     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1658       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1659         VAddCMicrokernelTester()
1660           .batch_size(batch_size)
1661           .y_scale(y_scale)
1662           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1663       }
1664     }
1665   }
1666 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,qmin)1667   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, qmin) {
1668     TEST_REQUIRES_X86_SSE41;
1669     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1670       VAddCMicrokernelTester()
1671         .batch_size(batch_size)
1672         .qmin(128)
1673         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1674     }
1675   }
1676 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16,qmax)1677   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X16, qmax) {
1678     TEST_REQUIRES_X86_SSE41;
1679     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1680       VAddCMicrokernelTester()
1681         .batch_size(batch_size)
1682         .qmax(128)
1683         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
1684     }
1685   }
1686 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1687 
1688 
1689 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,batch_eq_24)1690   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, batch_eq_24) {
1691     TEST_REQUIRES_X86_SSE41;
1692     VAddCMicrokernelTester()
1693       .batch_size(24)
1694       .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1695   }
1696 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,batch_div_24)1697   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, batch_div_24) {
1698     TEST_REQUIRES_X86_SSE41;
1699     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1700       VAddCMicrokernelTester()
1701         .batch_size(batch_size)
1702         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1703     }
1704   }
1705 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,batch_lt_24)1706   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, batch_lt_24) {
1707     TEST_REQUIRES_X86_SSE41;
1708     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1709       VAddCMicrokernelTester()
1710         .batch_size(batch_size)
1711         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1712     }
1713   }
1714 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,batch_gt_24)1715   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, batch_gt_24) {
1716     TEST_REQUIRES_X86_SSE41;
1717     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1718       VAddCMicrokernelTester()
1719         .batch_size(batch_size)
1720         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1721     }
1722   }
1723 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,inplace)1724   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, inplace) {
1725     TEST_REQUIRES_X86_SSE41;
1726     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1727       VAddCMicrokernelTester()
1728         .batch_size(batch_size)
1729         .inplace(true)
1730         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1731     }
1732   }
1733 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,a_zero_point)1734   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, a_zero_point) {
1735     TEST_REQUIRES_X86_SSE41;
1736     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1737       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1738         VAddCMicrokernelTester()
1739           .batch_size(batch_size)
1740           .a_zero_point(a_zero_point)
1741           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1742       }
1743     }
1744   }
1745 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,b_zero_point)1746   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, b_zero_point) {
1747     TEST_REQUIRES_X86_SSE41;
1748     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1749       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1750         VAddCMicrokernelTester()
1751           .batch_size(batch_size)
1752           .b_zero_point(b_zero_point)
1753           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1754       }
1755     }
1756   }
1757 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,y_zero_point)1758   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, y_zero_point) {
1759     TEST_REQUIRES_X86_SSE41;
1760     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1761       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1762         VAddCMicrokernelTester()
1763           .batch_size(batch_size)
1764           .y_zero_point(y_zero_point)
1765           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1766       }
1767     }
1768   }
1769 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,a_scale)1770   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, a_scale) {
1771     TEST_REQUIRES_X86_SSE41;
1772     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1773       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1774         VAddCMicrokernelTester()
1775           .batch_size(batch_size)
1776           .a_scale(a_scale)
1777           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1778       }
1779     }
1780   }
1781 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,b_scale)1782   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, b_scale) {
1783     TEST_REQUIRES_X86_SSE41;
1784     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1785       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1786         VAddCMicrokernelTester()
1787           .batch_size(batch_size)
1788           .b_scale(b_scale)
1789           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1790       }
1791     }
1792   }
1793 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,y_scale)1794   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, y_scale) {
1795     TEST_REQUIRES_X86_SSE41;
1796     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1797       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1798         VAddCMicrokernelTester()
1799           .batch_size(batch_size)
1800           .y_scale(y_scale)
1801           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1802       }
1803     }
1804   }
1805 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,qmin)1806   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, qmin) {
1807     TEST_REQUIRES_X86_SSE41;
1808     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1809       VAddCMicrokernelTester()
1810         .batch_size(batch_size)
1811         .qmin(128)
1812         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1813     }
1814   }
1815 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24,qmax)1816   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X24, qmax) {
1817     TEST_REQUIRES_X86_SSE41;
1818     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1819       VAddCMicrokernelTester()
1820         .batch_size(batch_size)
1821         .qmax(128)
1822         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
1823     }
1824   }
1825 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1826 
1827 
1828 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,batch_eq_32)1829   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, batch_eq_32) {
1830     TEST_REQUIRES_X86_SSE41;
1831     VAddCMicrokernelTester()
1832       .batch_size(32)
1833       .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1834   }
1835 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,batch_div_32)1836   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, batch_div_32) {
1837     TEST_REQUIRES_X86_SSE41;
1838     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
1839       VAddCMicrokernelTester()
1840         .batch_size(batch_size)
1841         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1842     }
1843   }
1844 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,batch_lt_32)1845   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, batch_lt_32) {
1846     TEST_REQUIRES_X86_SSE41;
1847     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
1848       VAddCMicrokernelTester()
1849         .batch_size(batch_size)
1850         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1851     }
1852   }
1853 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,batch_gt_32)1854   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, batch_gt_32) {
1855     TEST_REQUIRES_X86_SSE41;
1856     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
1857       VAddCMicrokernelTester()
1858         .batch_size(batch_size)
1859         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1860     }
1861   }
1862 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,inplace)1863   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, inplace) {
1864     TEST_REQUIRES_X86_SSE41;
1865     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1866       VAddCMicrokernelTester()
1867         .batch_size(batch_size)
1868         .inplace(true)
1869         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1870     }
1871   }
1872 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,a_zero_point)1873   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, a_zero_point) {
1874     TEST_REQUIRES_X86_SSE41;
1875     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1876       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
1877         VAddCMicrokernelTester()
1878           .batch_size(batch_size)
1879           .a_zero_point(a_zero_point)
1880           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1881       }
1882     }
1883   }
1884 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,b_zero_point)1885   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, b_zero_point) {
1886     TEST_REQUIRES_X86_SSE41;
1887     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1888       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
1889         VAddCMicrokernelTester()
1890           .batch_size(batch_size)
1891           .b_zero_point(b_zero_point)
1892           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1893       }
1894     }
1895   }
1896 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,y_zero_point)1897   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, y_zero_point) {
1898     TEST_REQUIRES_X86_SSE41;
1899     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1900       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
1901         VAddCMicrokernelTester()
1902           .batch_size(batch_size)
1903           .y_zero_point(y_zero_point)
1904           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1905       }
1906     }
1907   }
1908 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,a_scale)1909   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, a_scale) {
1910     TEST_REQUIRES_X86_SSE41;
1911     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1912       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
1913         VAddCMicrokernelTester()
1914           .batch_size(batch_size)
1915           .a_scale(a_scale)
1916           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1917       }
1918     }
1919   }
1920 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,b_scale)1921   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, b_scale) {
1922     TEST_REQUIRES_X86_SSE41;
1923     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1924       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
1925         VAddCMicrokernelTester()
1926           .batch_size(batch_size)
1927           .b_scale(b_scale)
1928           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1929       }
1930     }
1931   }
1932 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,y_scale)1933   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, y_scale) {
1934     TEST_REQUIRES_X86_SSE41;
1935     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1936       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
1937         VAddCMicrokernelTester()
1938           .batch_size(batch_size)
1939           .y_scale(y_scale)
1940           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1941       }
1942     }
1943   }
1944 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,qmin)1945   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, qmin) {
1946     TEST_REQUIRES_X86_SSE41;
1947     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1948       VAddCMicrokernelTester()
1949         .batch_size(batch_size)
1950         .qmin(128)
1951         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1952     }
1953   }
1954 
TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32,qmax)1955   TEST(QS8_VADDC_MINMAX__SSE41_MUL16_LD64_X32, qmax) {
1956     TEST_REQUIRES_X86_SSE41;
1957     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
1958       VAddCMicrokernelTester()
1959         .batch_size(batch_size)
1960         .qmax(128)
1961         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
1962     }
1963   }
1964 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1965 
1966 
1967 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,batch_eq_8)1968   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, batch_eq_8) {
1969     TEST_REQUIRES_X86_AVX;
1970     VAddCMicrokernelTester()
1971       .batch_size(8)
1972       .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1973   }
1974 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,batch_div_8)1975   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, batch_div_8) {
1976     TEST_REQUIRES_X86_AVX;
1977     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1978       VAddCMicrokernelTester()
1979         .batch_size(batch_size)
1980         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1981     }
1982   }
1983 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,batch_lt_8)1984   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, batch_lt_8) {
1985     TEST_REQUIRES_X86_AVX;
1986     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1987       VAddCMicrokernelTester()
1988         .batch_size(batch_size)
1989         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1990     }
1991   }
1992 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,batch_gt_8)1993   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, batch_gt_8) {
1994     TEST_REQUIRES_X86_AVX;
1995     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1996       VAddCMicrokernelTester()
1997         .batch_size(batch_size)
1998         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
1999     }
2000   }
2001 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,inplace)2002   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, inplace) {
2003     TEST_REQUIRES_X86_AVX;
2004     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2005       VAddCMicrokernelTester()
2006         .batch_size(batch_size)
2007         .inplace(true)
2008         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2009     }
2010   }
2011 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,a_zero_point)2012   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, a_zero_point) {
2013     TEST_REQUIRES_X86_AVX;
2014     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2015       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2016         VAddCMicrokernelTester()
2017           .batch_size(batch_size)
2018           .a_zero_point(a_zero_point)
2019           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2020       }
2021     }
2022   }
2023 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,b_zero_point)2024   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, b_zero_point) {
2025     TEST_REQUIRES_X86_AVX;
2026     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2027       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2028         VAddCMicrokernelTester()
2029           .batch_size(batch_size)
2030           .b_zero_point(b_zero_point)
2031           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2032       }
2033     }
2034   }
2035 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,y_zero_point)2036   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, y_zero_point) {
2037     TEST_REQUIRES_X86_AVX;
2038     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2039       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2040         VAddCMicrokernelTester()
2041           .batch_size(batch_size)
2042           .y_zero_point(y_zero_point)
2043           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2044       }
2045     }
2046   }
2047 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,a_scale)2048   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, a_scale) {
2049     TEST_REQUIRES_X86_AVX;
2050     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2051       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2052         VAddCMicrokernelTester()
2053           .batch_size(batch_size)
2054           .a_scale(a_scale)
2055           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2056       }
2057     }
2058   }
2059 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,b_scale)2060   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, b_scale) {
2061     TEST_REQUIRES_X86_AVX;
2062     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2063       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2064         VAddCMicrokernelTester()
2065           .batch_size(batch_size)
2066           .b_scale(b_scale)
2067           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2068       }
2069     }
2070   }
2071 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,y_scale)2072   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, y_scale) {
2073     TEST_REQUIRES_X86_AVX;
2074     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2075       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2076         VAddCMicrokernelTester()
2077           .batch_size(batch_size)
2078           .y_scale(y_scale)
2079           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2080       }
2081     }
2082   }
2083 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,qmin)2084   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, qmin) {
2085     TEST_REQUIRES_X86_AVX;
2086     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2087       VAddCMicrokernelTester()
2088         .batch_size(batch_size)
2089         .qmin(128)
2090         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2091     }
2092   }
2093 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8,qmax)2094   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X8, qmax) {
2095     TEST_REQUIRES_X86_AVX;
2096     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2097       VAddCMicrokernelTester()
2098         .batch_size(batch_size)
2099         .qmax(128)
2100         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x8, xnn_init_qs8_add_minmax_sse4_mul16_params);
2101     }
2102   }
2103 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2104 
2105 
2106 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,batch_eq_16)2107   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, batch_eq_16) {
2108     TEST_REQUIRES_X86_AVX;
2109     VAddCMicrokernelTester()
2110       .batch_size(16)
2111       .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2112   }
2113 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,batch_div_16)2114   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, batch_div_16) {
2115     TEST_REQUIRES_X86_AVX;
2116     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2117       VAddCMicrokernelTester()
2118         .batch_size(batch_size)
2119         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2120     }
2121   }
2122 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,batch_lt_16)2123   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, batch_lt_16) {
2124     TEST_REQUIRES_X86_AVX;
2125     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2126       VAddCMicrokernelTester()
2127         .batch_size(batch_size)
2128         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2129     }
2130   }
2131 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,batch_gt_16)2132   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, batch_gt_16) {
2133     TEST_REQUIRES_X86_AVX;
2134     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2135       VAddCMicrokernelTester()
2136         .batch_size(batch_size)
2137         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2138     }
2139   }
2140 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,inplace)2141   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, inplace) {
2142     TEST_REQUIRES_X86_AVX;
2143     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2144       VAddCMicrokernelTester()
2145         .batch_size(batch_size)
2146         .inplace(true)
2147         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2148     }
2149   }
2150 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,a_zero_point)2151   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, a_zero_point) {
2152     TEST_REQUIRES_X86_AVX;
2153     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2154       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2155         VAddCMicrokernelTester()
2156           .batch_size(batch_size)
2157           .a_zero_point(a_zero_point)
2158           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2159       }
2160     }
2161   }
2162 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,b_zero_point)2163   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, b_zero_point) {
2164     TEST_REQUIRES_X86_AVX;
2165     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2166       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2167         VAddCMicrokernelTester()
2168           .batch_size(batch_size)
2169           .b_zero_point(b_zero_point)
2170           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2171       }
2172     }
2173   }
2174 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,y_zero_point)2175   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, y_zero_point) {
2176     TEST_REQUIRES_X86_AVX;
2177     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2178       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2179         VAddCMicrokernelTester()
2180           .batch_size(batch_size)
2181           .y_zero_point(y_zero_point)
2182           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2183       }
2184     }
2185   }
2186 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,a_scale)2187   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, a_scale) {
2188     TEST_REQUIRES_X86_AVX;
2189     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2190       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2191         VAddCMicrokernelTester()
2192           .batch_size(batch_size)
2193           .a_scale(a_scale)
2194           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2195       }
2196     }
2197   }
2198 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,b_scale)2199   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, b_scale) {
2200     TEST_REQUIRES_X86_AVX;
2201     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2202       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2203         VAddCMicrokernelTester()
2204           .batch_size(batch_size)
2205           .b_scale(b_scale)
2206           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2207       }
2208     }
2209   }
2210 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,y_scale)2211   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, y_scale) {
2212     TEST_REQUIRES_X86_AVX;
2213     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2214       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2215         VAddCMicrokernelTester()
2216           .batch_size(batch_size)
2217           .y_scale(y_scale)
2218           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2219       }
2220     }
2221   }
2222 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,qmin)2223   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, qmin) {
2224     TEST_REQUIRES_X86_AVX;
2225     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2226       VAddCMicrokernelTester()
2227         .batch_size(batch_size)
2228         .qmin(128)
2229         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2230     }
2231   }
2232 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16,qmax)2233   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X16, qmax) {
2234     TEST_REQUIRES_X86_AVX;
2235     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2236       VAddCMicrokernelTester()
2237         .batch_size(batch_size)
2238         .qmax(128)
2239         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x16, xnn_init_qs8_add_minmax_sse4_mul16_params);
2240     }
2241   }
2242 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2243 
2244 
2245 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,batch_eq_24)2246   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, batch_eq_24) {
2247     TEST_REQUIRES_X86_AVX;
2248     VAddCMicrokernelTester()
2249       .batch_size(24)
2250       .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2251   }
2252 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,batch_div_24)2253   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, batch_div_24) {
2254     TEST_REQUIRES_X86_AVX;
2255     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
2256       VAddCMicrokernelTester()
2257         .batch_size(batch_size)
2258         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2259     }
2260   }
2261 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,batch_lt_24)2262   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, batch_lt_24) {
2263     TEST_REQUIRES_X86_AVX;
2264     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
2265       VAddCMicrokernelTester()
2266         .batch_size(batch_size)
2267         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2268     }
2269   }
2270 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,batch_gt_24)2271   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, batch_gt_24) {
2272     TEST_REQUIRES_X86_AVX;
2273     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
2274       VAddCMicrokernelTester()
2275         .batch_size(batch_size)
2276         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2277     }
2278   }
2279 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,inplace)2280   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, inplace) {
2281     TEST_REQUIRES_X86_AVX;
2282     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2283       VAddCMicrokernelTester()
2284         .batch_size(batch_size)
2285         .inplace(true)
2286         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2287     }
2288   }
2289 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,a_zero_point)2290   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, a_zero_point) {
2291     TEST_REQUIRES_X86_AVX;
2292     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2293       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2294         VAddCMicrokernelTester()
2295           .batch_size(batch_size)
2296           .a_zero_point(a_zero_point)
2297           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2298       }
2299     }
2300   }
2301 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,b_zero_point)2302   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, b_zero_point) {
2303     TEST_REQUIRES_X86_AVX;
2304     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2305       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2306         VAddCMicrokernelTester()
2307           .batch_size(batch_size)
2308           .b_zero_point(b_zero_point)
2309           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2310       }
2311     }
2312   }
2313 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,y_zero_point)2314   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, y_zero_point) {
2315     TEST_REQUIRES_X86_AVX;
2316     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2317       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2318         VAddCMicrokernelTester()
2319           .batch_size(batch_size)
2320           .y_zero_point(y_zero_point)
2321           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2322       }
2323     }
2324   }
2325 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,a_scale)2326   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, a_scale) {
2327     TEST_REQUIRES_X86_AVX;
2328     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2329       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2330         VAddCMicrokernelTester()
2331           .batch_size(batch_size)
2332           .a_scale(a_scale)
2333           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2334       }
2335     }
2336   }
2337 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,b_scale)2338   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, b_scale) {
2339     TEST_REQUIRES_X86_AVX;
2340     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2341       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2342         VAddCMicrokernelTester()
2343           .batch_size(batch_size)
2344           .b_scale(b_scale)
2345           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2346       }
2347     }
2348   }
2349 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,y_scale)2350   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, y_scale) {
2351     TEST_REQUIRES_X86_AVX;
2352     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2353       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2354         VAddCMicrokernelTester()
2355           .batch_size(batch_size)
2356           .y_scale(y_scale)
2357           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2358       }
2359     }
2360   }
2361 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,qmin)2362   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, qmin) {
2363     TEST_REQUIRES_X86_AVX;
2364     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2365       VAddCMicrokernelTester()
2366         .batch_size(batch_size)
2367         .qmin(128)
2368         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2369     }
2370   }
2371 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24,qmax)2372   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X24, qmax) {
2373     TEST_REQUIRES_X86_AVX;
2374     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2375       VAddCMicrokernelTester()
2376         .batch_size(batch_size)
2377         .qmax(128)
2378         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x24, xnn_init_qs8_add_minmax_sse4_mul16_params);
2379     }
2380   }
2381 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2382 
2383 
2384 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,batch_eq_32)2385   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, batch_eq_32) {
2386     TEST_REQUIRES_X86_AVX;
2387     VAddCMicrokernelTester()
2388       .batch_size(32)
2389       .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2390   }
2391 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,batch_div_32)2392   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, batch_div_32) {
2393     TEST_REQUIRES_X86_AVX;
2394     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
2395       VAddCMicrokernelTester()
2396         .batch_size(batch_size)
2397         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2398     }
2399   }
2400 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,batch_lt_32)2401   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, batch_lt_32) {
2402     TEST_REQUIRES_X86_AVX;
2403     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
2404       VAddCMicrokernelTester()
2405         .batch_size(batch_size)
2406         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2407     }
2408   }
2409 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,batch_gt_32)2410   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, batch_gt_32) {
2411     TEST_REQUIRES_X86_AVX;
2412     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
2413       VAddCMicrokernelTester()
2414         .batch_size(batch_size)
2415         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2416     }
2417   }
2418 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,inplace)2419   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, inplace) {
2420     TEST_REQUIRES_X86_AVX;
2421     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2422       VAddCMicrokernelTester()
2423         .batch_size(batch_size)
2424         .inplace(true)
2425         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2426     }
2427   }
2428 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,a_zero_point)2429   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, a_zero_point) {
2430     TEST_REQUIRES_X86_AVX;
2431     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2432       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2433         VAddCMicrokernelTester()
2434           .batch_size(batch_size)
2435           .a_zero_point(a_zero_point)
2436           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2437       }
2438     }
2439   }
2440 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,b_zero_point)2441   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, b_zero_point) {
2442     TEST_REQUIRES_X86_AVX;
2443     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2444       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2445         VAddCMicrokernelTester()
2446           .batch_size(batch_size)
2447           .b_zero_point(b_zero_point)
2448           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2449       }
2450     }
2451   }
2452 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,y_zero_point)2453   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, y_zero_point) {
2454     TEST_REQUIRES_X86_AVX;
2455     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2456       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2457         VAddCMicrokernelTester()
2458           .batch_size(batch_size)
2459           .y_zero_point(y_zero_point)
2460           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2461       }
2462     }
2463   }
2464 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,a_scale)2465   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, a_scale) {
2466     TEST_REQUIRES_X86_AVX;
2467     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2468       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2469         VAddCMicrokernelTester()
2470           .batch_size(batch_size)
2471           .a_scale(a_scale)
2472           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2473       }
2474     }
2475   }
2476 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,b_scale)2477   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, b_scale) {
2478     TEST_REQUIRES_X86_AVX;
2479     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2480       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2481         VAddCMicrokernelTester()
2482           .batch_size(batch_size)
2483           .b_scale(b_scale)
2484           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2485       }
2486     }
2487   }
2488 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,y_scale)2489   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, y_scale) {
2490     TEST_REQUIRES_X86_AVX;
2491     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2492       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2493         VAddCMicrokernelTester()
2494           .batch_size(batch_size)
2495           .y_scale(y_scale)
2496           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2497       }
2498     }
2499   }
2500 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,qmin)2501   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, qmin) {
2502     TEST_REQUIRES_X86_AVX;
2503     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2504       VAddCMicrokernelTester()
2505         .batch_size(batch_size)
2506         .qmin(128)
2507         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2508     }
2509   }
2510 
TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32,qmax)2511   TEST(QS8_VADDC_MINMAX__AVX_MUL16_LD64_X32, qmax) {
2512     TEST_REQUIRES_X86_AVX;
2513     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2514       VAddCMicrokernelTester()
2515         .batch_size(batch_size)
2516         .qmax(128)
2517         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul16_ld64_x32, xnn_init_qs8_add_minmax_sse4_mul16_params);
2518     }
2519   }
2520 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2521 
2522 
2523 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,batch_eq_8)2524   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, batch_eq_8) {
2525     TEST_REQUIRES_X86_SSE41;
2526     VAddCMicrokernelTester()
2527       .batch_size(8)
2528       .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2529   }
2530 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,batch_div_8)2531   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, batch_div_8) {
2532     TEST_REQUIRES_X86_SSE41;
2533     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2534       VAddCMicrokernelTester()
2535         .batch_size(batch_size)
2536         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2537     }
2538   }
2539 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,batch_lt_8)2540   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, batch_lt_8) {
2541     TEST_REQUIRES_X86_SSE41;
2542     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2543       VAddCMicrokernelTester()
2544         .batch_size(batch_size)
2545         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2546     }
2547   }
2548 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,batch_gt_8)2549   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, batch_gt_8) {
2550     TEST_REQUIRES_X86_SSE41;
2551     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2552       VAddCMicrokernelTester()
2553         .batch_size(batch_size)
2554         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2555     }
2556   }
2557 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,inplace)2558   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, inplace) {
2559     TEST_REQUIRES_X86_SSE41;
2560     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2561       VAddCMicrokernelTester()
2562         .batch_size(batch_size)
2563         .inplace(true)
2564         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2565     }
2566   }
2567 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,a_zero_point)2568   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, a_zero_point) {
2569     TEST_REQUIRES_X86_SSE41;
2570     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2571       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2572         VAddCMicrokernelTester()
2573           .batch_size(batch_size)
2574           .a_zero_point(a_zero_point)
2575           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2576       }
2577     }
2578   }
2579 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,b_zero_point)2580   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, b_zero_point) {
2581     TEST_REQUIRES_X86_SSE41;
2582     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2583       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2584         VAddCMicrokernelTester()
2585           .batch_size(batch_size)
2586           .b_zero_point(b_zero_point)
2587           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2588       }
2589     }
2590   }
2591 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,y_zero_point)2592   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, y_zero_point) {
2593     TEST_REQUIRES_X86_SSE41;
2594     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2595       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2596         VAddCMicrokernelTester()
2597           .batch_size(batch_size)
2598           .y_zero_point(y_zero_point)
2599           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2600       }
2601     }
2602   }
2603 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,a_scale)2604   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, a_scale) {
2605     TEST_REQUIRES_X86_SSE41;
2606     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2607       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2608         VAddCMicrokernelTester()
2609           .batch_size(batch_size)
2610           .a_scale(a_scale)
2611           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2612       }
2613     }
2614   }
2615 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,b_scale)2616   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, b_scale) {
2617     TEST_REQUIRES_X86_SSE41;
2618     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2619       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2620         VAddCMicrokernelTester()
2621           .batch_size(batch_size)
2622           .b_scale(b_scale)
2623           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2624       }
2625     }
2626   }
2627 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,y_scale)2628   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, y_scale) {
2629     TEST_REQUIRES_X86_SSE41;
2630     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2631       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2632         VAddCMicrokernelTester()
2633           .batch_size(batch_size)
2634           .y_scale(y_scale)
2635           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2636       }
2637     }
2638   }
2639 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,qmin)2640   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, qmin) {
2641     TEST_REQUIRES_X86_SSE41;
2642     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2643       VAddCMicrokernelTester()
2644         .batch_size(batch_size)
2645         .qmin(128)
2646         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2647     }
2648   }
2649 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8,qmax)2650   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X8, qmax) {
2651     TEST_REQUIRES_X86_SSE41;
2652     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2653       VAddCMicrokernelTester()
2654         .batch_size(batch_size)
2655         .qmax(128)
2656         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
2657     }
2658   }
2659 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2660 
2661 
2662 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,batch_eq_16)2663   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, batch_eq_16) {
2664     TEST_REQUIRES_X86_SSE41;
2665     VAddCMicrokernelTester()
2666       .batch_size(16)
2667       .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2668   }
2669 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,batch_div_16)2670   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, batch_div_16) {
2671     TEST_REQUIRES_X86_SSE41;
2672     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2673       VAddCMicrokernelTester()
2674         .batch_size(batch_size)
2675         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2676     }
2677   }
2678 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,batch_lt_16)2679   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, batch_lt_16) {
2680     TEST_REQUIRES_X86_SSE41;
2681     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2682       VAddCMicrokernelTester()
2683         .batch_size(batch_size)
2684         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2685     }
2686   }
2687 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,batch_gt_16)2688   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, batch_gt_16) {
2689     TEST_REQUIRES_X86_SSE41;
2690     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2691       VAddCMicrokernelTester()
2692         .batch_size(batch_size)
2693         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2694     }
2695   }
2696 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,inplace)2697   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, inplace) {
2698     TEST_REQUIRES_X86_SSE41;
2699     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2700       VAddCMicrokernelTester()
2701         .batch_size(batch_size)
2702         .inplace(true)
2703         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2704     }
2705   }
2706 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,a_zero_point)2707   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, a_zero_point) {
2708     TEST_REQUIRES_X86_SSE41;
2709     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2710       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2711         VAddCMicrokernelTester()
2712           .batch_size(batch_size)
2713           .a_zero_point(a_zero_point)
2714           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2715       }
2716     }
2717   }
2718 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,b_zero_point)2719   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, b_zero_point) {
2720     TEST_REQUIRES_X86_SSE41;
2721     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2722       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2723         VAddCMicrokernelTester()
2724           .batch_size(batch_size)
2725           .b_zero_point(b_zero_point)
2726           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2727       }
2728     }
2729   }
2730 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,y_zero_point)2731   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, y_zero_point) {
2732     TEST_REQUIRES_X86_SSE41;
2733     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2734       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2735         VAddCMicrokernelTester()
2736           .batch_size(batch_size)
2737           .y_zero_point(y_zero_point)
2738           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2739       }
2740     }
2741   }
2742 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,a_scale)2743   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, a_scale) {
2744     TEST_REQUIRES_X86_SSE41;
2745     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2746       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2747         VAddCMicrokernelTester()
2748           .batch_size(batch_size)
2749           .a_scale(a_scale)
2750           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2751       }
2752     }
2753   }
2754 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,b_scale)2755   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, b_scale) {
2756     TEST_REQUIRES_X86_SSE41;
2757     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2758       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2759         VAddCMicrokernelTester()
2760           .batch_size(batch_size)
2761           .b_scale(b_scale)
2762           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2763       }
2764     }
2765   }
2766 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,y_scale)2767   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, y_scale) {
2768     TEST_REQUIRES_X86_SSE41;
2769     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2770       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2771         VAddCMicrokernelTester()
2772           .batch_size(batch_size)
2773           .y_scale(y_scale)
2774           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2775       }
2776     }
2777   }
2778 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,qmin)2779   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, qmin) {
2780     TEST_REQUIRES_X86_SSE41;
2781     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2782       VAddCMicrokernelTester()
2783         .batch_size(batch_size)
2784         .qmin(128)
2785         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2786     }
2787   }
2788 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16,qmax)2789   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X16, qmax) {
2790     TEST_REQUIRES_X86_SSE41;
2791     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2792       VAddCMicrokernelTester()
2793         .batch_size(batch_size)
2794         .qmax(128)
2795         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
2796     }
2797   }
2798 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2799 
2800 
2801 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,batch_eq_24)2802   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, batch_eq_24) {
2803     TEST_REQUIRES_X86_SSE41;
2804     VAddCMicrokernelTester()
2805       .batch_size(24)
2806       .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2807   }
2808 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,batch_div_24)2809   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, batch_div_24) {
2810     TEST_REQUIRES_X86_SSE41;
2811     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
2812       VAddCMicrokernelTester()
2813         .batch_size(batch_size)
2814         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2815     }
2816   }
2817 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,batch_lt_24)2818   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, batch_lt_24) {
2819     TEST_REQUIRES_X86_SSE41;
2820     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
2821       VAddCMicrokernelTester()
2822         .batch_size(batch_size)
2823         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2824     }
2825   }
2826 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,batch_gt_24)2827   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, batch_gt_24) {
2828     TEST_REQUIRES_X86_SSE41;
2829     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
2830       VAddCMicrokernelTester()
2831         .batch_size(batch_size)
2832         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2833     }
2834   }
2835 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,inplace)2836   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, inplace) {
2837     TEST_REQUIRES_X86_SSE41;
2838     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2839       VAddCMicrokernelTester()
2840         .batch_size(batch_size)
2841         .inplace(true)
2842         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2843     }
2844   }
2845 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,a_zero_point)2846   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, a_zero_point) {
2847     TEST_REQUIRES_X86_SSE41;
2848     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2849       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2850         VAddCMicrokernelTester()
2851           .batch_size(batch_size)
2852           .a_zero_point(a_zero_point)
2853           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2854       }
2855     }
2856   }
2857 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,b_zero_point)2858   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, b_zero_point) {
2859     TEST_REQUIRES_X86_SSE41;
2860     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2861       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
2862         VAddCMicrokernelTester()
2863           .batch_size(batch_size)
2864           .b_zero_point(b_zero_point)
2865           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2866       }
2867     }
2868   }
2869 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,y_zero_point)2870   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, y_zero_point) {
2871     TEST_REQUIRES_X86_SSE41;
2872     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2873       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
2874         VAddCMicrokernelTester()
2875           .batch_size(batch_size)
2876           .y_zero_point(y_zero_point)
2877           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2878       }
2879     }
2880   }
2881 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,a_scale)2882   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, a_scale) {
2883     TEST_REQUIRES_X86_SSE41;
2884     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2885       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
2886         VAddCMicrokernelTester()
2887           .batch_size(batch_size)
2888           .a_scale(a_scale)
2889           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2890       }
2891     }
2892   }
2893 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,b_scale)2894   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, b_scale) {
2895     TEST_REQUIRES_X86_SSE41;
2896     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2897       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
2898         VAddCMicrokernelTester()
2899           .batch_size(batch_size)
2900           .b_scale(b_scale)
2901           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2902       }
2903     }
2904   }
2905 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,y_scale)2906   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, y_scale) {
2907     TEST_REQUIRES_X86_SSE41;
2908     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2909       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
2910         VAddCMicrokernelTester()
2911           .batch_size(batch_size)
2912           .y_scale(y_scale)
2913           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2914       }
2915     }
2916   }
2917 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,qmin)2918   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, qmin) {
2919     TEST_REQUIRES_X86_SSE41;
2920     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2921       VAddCMicrokernelTester()
2922         .batch_size(batch_size)
2923         .qmin(128)
2924         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2925     }
2926   }
2927 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24,qmax)2928   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X24, qmax) {
2929     TEST_REQUIRES_X86_SSE41;
2930     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2931       VAddCMicrokernelTester()
2932         .batch_size(batch_size)
2933         .qmax(128)
2934         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
2935     }
2936   }
2937 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2938 
2939 
2940 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,batch_eq_32)2941   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, batch_eq_32) {
2942     TEST_REQUIRES_X86_SSE41;
2943     VAddCMicrokernelTester()
2944       .batch_size(32)
2945       .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
2946   }
2947 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,batch_div_32)2948   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, batch_div_32) {
2949     TEST_REQUIRES_X86_SSE41;
2950     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
2951       VAddCMicrokernelTester()
2952         .batch_size(batch_size)
2953         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
2954     }
2955   }
2956 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,batch_lt_32)2957   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, batch_lt_32) {
2958     TEST_REQUIRES_X86_SSE41;
2959     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
2960       VAddCMicrokernelTester()
2961         .batch_size(batch_size)
2962         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
2963     }
2964   }
2965 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,batch_gt_32)2966   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, batch_gt_32) {
2967     TEST_REQUIRES_X86_SSE41;
2968     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
2969       VAddCMicrokernelTester()
2970         .batch_size(batch_size)
2971         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
2972     }
2973   }
2974 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,inplace)2975   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, inplace) {
2976     TEST_REQUIRES_X86_SSE41;
2977     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2978       VAddCMicrokernelTester()
2979         .batch_size(batch_size)
2980         .inplace(true)
2981         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
2982     }
2983   }
2984 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,a_zero_point)2985   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, a_zero_point) {
2986     TEST_REQUIRES_X86_SSE41;
2987     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
2988       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
2989         VAddCMicrokernelTester()
2990           .batch_size(batch_size)
2991           .a_zero_point(a_zero_point)
2992           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
2993       }
2994     }
2995   }
2996 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,b_zero_point)2997   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, b_zero_point) {
2998     TEST_REQUIRES_X86_SSE41;
2999     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3000       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3001         VAddCMicrokernelTester()
3002           .batch_size(batch_size)
3003           .b_zero_point(b_zero_point)
3004           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3005       }
3006     }
3007   }
3008 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,y_zero_point)3009   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, y_zero_point) {
3010     TEST_REQUIRES_X86_SSE41;
3011     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3012       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3013         VAddCMicrokernelTester()
3014           .batch_size(batch_size)
3015           .y_zero_point(y_zero_point)
3016           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3017       }
3018     }
3019   }
3020 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,a_scale)3021   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, a_scale) {
3022     TEST_REQUIRES_X86_SSE41;
3023     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3024       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3025         VAddCMicrokernelTester()
3026           .batch_size(batch_size)
3027           .a_scale(a_scale)
3028           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3029       }
3030     }
3031   }
3032 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,b_scale)3033   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, b_scale) {
3034     TEST_REQUIRES_X86_SSE41;
3035     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3036       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3037         VAddCMicrokernelTester()
3038           .batch_size(batch_size)
3039           .b_scale(b_scale)
3040           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3041       }
3042     }
3043   }
3044 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,y_scale)3045   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, y_scale) {
3046     TEST_REQUIRES_X86_SSE41;
3047     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3048       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3049         VAddCMicrokernelTester()
3050           .batch_size(batch_size)
3051           .y_scale(y_scale)
3052           .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3053       }
3054     }
3055   }
3056 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,qmin)3057   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, qmin) {
3058     TEST_REQUIRES_X86_SSE41;
3059     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3060       VAddCMicrokernelTester()
3061         .batch_size(batch_size)
3062         .qmin(128)
3063         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3064     }
3065   }
3066 
TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32,qmax)3067   TEST(QS8_VADDC_MINMAX__SSE41_MUL32_LD32_X32, qmax) {
3068     TEST_REQUIRES_X86_SSE41;
3069     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3070       VAddCMicrokernelTester()
3071         .batch_size(batch_size)
3072         .qmax(128)
3073         .Test(xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3074     }
3075   }
3076 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3077 
3078 
3079 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,batch_eq_8)3080   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, batch_eq_8) {
3081     TEST_REQUIRES_X86_AVX;
3082     VAddCMicrokernelTester()
3083       .batch_size(8)
3084       .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3085   }
3086 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,batch_div_8)3087   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, batch_div_8) {
3088     TEST_REQUIRES_X86_AVX;
3089     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
3090       VAddCMicrokernelTester()
3091         .batch_size(batch_size)
3092         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3093     }
3094   }
3095 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,batch_lt_8)3096   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, batch_lt_8) {
3097     TEST_REQUIRES_X86_AVX;
3098     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
3099       VAddCMicrokernelTester()
3100         .batch_size(batch_size)
3101         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3102     }
3103   }
3104 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,batch_gt_8)3105   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, batch_gt_8) {
3106     TEST_REQUIRES_X86_AVX;
3107     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
3108       VAddCMicrokernelTester()
3109         .batch_size(batch_size)
3110         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3111     }
3112   }
3113 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,inplace)3114   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, inplace) {
3115     TEST_REQUIRES_X86_AVX;
3116     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3117       VAddCMicrokernelTester()
3118         .batch_size(batch_size)
3119         .inplace(true)
3120         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3121     }
3122   }
3123 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,a_zero_point)3124   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, a_zero_point) {
3125     TEST_REQUIRES_X86_AVX;
3126     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3127       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3128         VAddCMicrokernelTester()
3129           .batch_size(batch_size)
3130           .a_zero_point(a_zero_point)
3131           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3132       }
3133     }
3134   }
3135 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,b_zero_point)3136   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, b_zero_point) {
3137     TEST_REQUIRES_X86_AVX;
3138     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3139       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3140         VAddCMicrokernelTester()
3141           .batch_size(batch_size)
3142           .b_zero_point(b_zero_point)
3143           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3144       }
3145     }
3146   }
3147 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,y_zero_point)3148   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, y_zero_point) {
3149     TEST_REQUIRES_X86_AVX;
3150     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3151       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3152         VAddCMicrokernelTester()
3153           .batch_size(batch_size)
3154           .y_zero_point(y_zero_point)
3155           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3156       }
3157     }
3158   }
3159 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,a_scale)3160   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, a_scale) {
3161     TEST_REQUIRES_X86_AVX;
3162     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3163       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3164         VAddCMicrokernelTester()
3165           .batch_size(batch_size)
3166           .a_scale(a_scale)
3167           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3168       }
3169     }
3170   }
3171 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,b_scale)3172   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, b_scale) {
3173     TEST_REQUIRES_X86_AVX;
3174     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3175       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3176         VAddCMicrokernelTester()
3177           .batch_size(batch_size)
3178           .b_scale(b_scale)
3179           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3180       }
3181     }
3182   }
3183 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,y_scale)3184   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, y_scale) {
3185     TEST_REQUIRES_X86_AVX;
3186     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3187       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3188         VAddCMicrokernelTester()
3189           .batch_size(batch_size)
3190           .y_scale(y_scale)
3191           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3192       }
3193     }
3194   }
3195 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,qmin)3196   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, qmin) {
3197     TEST_REQUIRES_X86_AVX;
3198     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3199       VAddCMicrokernelTester()
3200         .batch_size(batch_size)
3201         .qmin(128)
3202         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3203     }
3204   }
3205 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8,qmax)3206   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X8, qmax) {
3207     TEST_REQUIRES_X86_AVX;
3208     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3209       VAddCMicrokernelTester()
3210         .batch_size(batch_size)
3211         .qmax(128)
3212         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3213     }
3214   }
3215 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3216 
3217 
3218 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,batch_eq_16)3219   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, batch_eq_16) {
3220     TEST_REQUIRES_X86_AVX;
3221     VAddCMicrokernelTester()
3222       .batch_size(16)
3223       .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3224   }
3225 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,batch_div_16)3226   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, batch_div_16) {
3227     TEST_REQUIRES_X86_AVX;
3228     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
3229       VAddCMicrokernelTester()
3230         .batch_size(batch_size)
3231         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3232     }
3233   }
3234 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,batch_lt_16)3235   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, batch_lt_16) {
3236     TEST_REQUIRES_X86_AVX;
3237     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
3238       VAddCMicrokernelTester()
3239         .batch_size(batch_size)
3240         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3241     }
3242   }
3243 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,batch_gt_16)3244   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, batch_gt_16) {
3245     TEST_REQUIRES_X86_AVX;
3246     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
3247       VAddCMicrokernelTester()
3248         .batch_size(batch_size)
3249         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3250     }
3251   }
3252 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,inplace)3253   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, inplace) {
3254     TEST_REQUIRES_X86_AVX;
3255     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3256       VAddCMicrokernelTester()
3257         .batch_size(batch_size)
3258         .inplace(true)
3259         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3260     }
3261   }
3262 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,a_zero_point)3263   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, a_zero_point) {
3264     TEST_REQUIRES_X86_AVX;
3265     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3266       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3267         VAddCMicrokernelTester()
3268           .batch_size(batch_size)
3269           .a_zero_point(a_zero_point)
3270           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3271       }
3272     }
3273   }
3274 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,b_zero_point)3275   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, b_zero_point) {
3276     TEST_REQUIRES_X86_AVX;
3277     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3278       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3279         VAddCMicrokernelTester()
3280           .batch_size(batch_size)
3281           .b_zero_point(b_zero_point)
3282           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3283       }
3284     }
3285   }
3286 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,y_zero_point)3287   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, y_zero_point) {
3288     TEST_REQUIRES_X86_AVX;
3289     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3290       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3291         VAddCMicrokernelTester()
3292           .batch_size(batch_size)
3293           .y_zero_point(y_zero_point)
3294           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3295       }
3296     }
3297   }
3298 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,a_scale)3299   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, a_scale) {
3300     TEST_REQUIRES_X86_AVX;
3301     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3302       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3303         VAddCMicrokernelTester()
3304           .batch_size(batch_size)
3305           .a_scale(a_scale)
3306           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3307       }
3308     }
3309   }
3310 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,b_scale)3311   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, b_scale) {
3312     TEST_REQUIRES_X86_AVX;
3313     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3314       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3315         VAddCMicrokernelTester()
3316           .batch_size(batch_size)
3317           .b_scale(b_scale)
3318           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3319       }
3320     }
3321   }
3322 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,y_scale)3323   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, y_scale) {
3324     TEST_REQUIRES_X86_AVX;
3325     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3326       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3327         VAddCMicrokernelTester()
3328           .batch_size(batch_size)
3329           .y_scale(y_scale)
3330           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3331       }
3332     }
3333   }
3334 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,qmin)3335   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, qmin) {
3336     TEST_REQUIRES_X86_AVX;
3337     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3338       VAddCMicrokernelTester()
3339         .batch_size(batch_size)
3340         .qmin(128)
3341         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3342     }
3343   }
3344 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16,qmax)3345   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X16, qmax) {
3346     TEST_REQUIRES_X86_AVX;
3347     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3348       VAddCMicrokernelTester()
3349         .batch_size(batch_size)
3350         .qmax(128)
3351         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3352     }
3353   }
3354 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3355 
3356 
3357 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,batch_eq_24)3358   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, batch_eq_24) {
3359     TEST_REQUIRES_X86_AVX;
3360     VAddCMicrokernelTester()
3361       .batch_size(24)
3362       .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3363   }
3364 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,batch_div_24)3365   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, batch_div_24) {
3366     TEST_REQUIRES_X86_AVX;
3367     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
3368       VAddCMicrokernelTester()
3369         .batch_size(batch_size)
3370         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3371     }
3372   }
3373 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,batch_lt_24)3374   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, batch_lt_24) {
3375     TEST_REQUIRES_X86_AVX;
3376     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
3377       VAddCMicrokernelTester()
3378         .batch_size(batch_size)
3379         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3380     }
3381   }
3382 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,batch_gt_24)3383   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, batch_gt_24) {
3384     TEST_REQUIRES_X86_AVX;
3385     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
3386       VAddCMicrokernelTester()
3387         .batch_size(batch_size)
3388         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3389     }
3390   }
3391 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,inplace)3392   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, inplace) {
3393     TEST_REQUIRES_X86_AVX;
3394     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3395       VAddCMicrokernelTester()
3396         .batch_size(batch_size)
3397         .inplace(true)
3398         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3399     }
3400   }
3401 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,a_zero_point)3402   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, a_zero_point) {
3403     TEST_REQUIRES_X86_AVX;
3404     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3405       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3406         VAddCMicrokernelTester()
3407           .batch_size(batch_size)
3408           .a_zero_point(a_zero_point)
3409           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3410       }
3411     }
3412   }
3413 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,b_zero_point)3414   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, b_zero_point) {
3415     TEST_REQUIRES_X86_AVX;
3416     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3417       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3418         VAddCMicrokernelTester()
3419           .batch_size(batch_size)
3420           .b_zero_point(b_zero_point)
3421           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3422       }
3423     }
3424   }
3425 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,y_zero_point)3426   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, y_zero_point) {
3427     TEST_REQUIRES_X86_AVX;
3428     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3429       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3430         VAddCMicrokernelTester()
3431           .batch_size(batch_size)
3432           .y_zero_point(y_zero_point)
3433           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3434       }
3435     }
3436   }
3437 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,a_scale)3438   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, a_scale) {
3439     TEST_REQUIRES_X86_AVX;
3440     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3441       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3442         VAddCMicrokernelTester()
3443           .batch_size(batch_size)
3444           .a_scale(a_scale)
3445           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3446       }
3447     }
3448   }
3449 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,b_scale)3450   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, b_scale) {
3451     TEST_REQUIRES_X86_AVX;
3452     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3453       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3454         VAddCMicrokernelTester()
3455           .batch_size(batch_size)
3456           .b_scale(b_scale)
3457           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3458       }
3459     }
3460   }
3461 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,y_scale)3462   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, y_scale) {
3463     TEST_REQUIRES_X86_AVX;
3464     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3465       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3466         VAddCMicrokernelTester()
3467           .batch_size(batch_size)
3468           .y_scale(y_scale)
3469           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3470       }
3471     }
3472   }
3473 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,qmin)3474   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, qmin) {
3475     TEST_REQUIRES_X86_AVX;
3476     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3477       VAddCMicrokernelTester()
3478         .batch_size(batch_size)
3479         .qmin(128)
3480         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3481     }
3482   }
3483 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24,qmax)3484   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X24, qmax) {
3485     TEST_REQUIRES_X86_AVX;
3486     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3487       VAddCMicrokernelTester()
3488         .batch_size(batch_size)
3489         .qmax(128)
3490         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3491     }
3492   }
3493 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3494 
3495 
3496 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,batch_eq_32)3497   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, batch_eq_32) {
3498     TEST_REQUIRES_X86_AVX;
3499     VAddCMicrokernelTester()
3500       .batch_size(32)
3501       .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3502   }
3503 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,batch_div_32)3504   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, batch_div_32) {
3505     TEST_REQUIRES_X86_AVX;
3506     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
3507       VAddCMicrokernelTester()
3508         .batch_size(batch_size)
3509         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3510     }
3511   }
3512 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,batch_lt_32)3513   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, batch_lt_32) {
3514     TEST_REQUIRES_X86_AVX;
3515     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
3516       VAddCMicrokernelTester()
3517         .batch_size(batch_size)
3518         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3519     }
3520   }
3521 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,batch_gt_32)3522   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, batch_gt_32) {
3523     TEST_REQUIRES_X86_AVX;
3524     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
3525       VAddCMicrokernelTester()
3526         .batch_size(batch_size)
3527         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3528     }
3529   }
3530 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,inplace)3531   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, inplace) {
3532     TEST_REQUIRES_X86_AVX;
3533     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3534       VAddCMicrokernelTester()
3535         .batch_size(batch_size)
3536         .inplace(true)
3537         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3538     }
3539   }
3540 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,a_zero_point)3541   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, a_zero_point) {
3542     TEST_REQUIRES_X86_AVX;
3543     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3544       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3545         VAddCMicrokernelTester()
3546           .batch_size(batch_size)
3547           .a_zero_point(a_zero_point)
3548           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3549       }
3550     }
3551   }
3552 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,b_zero_point)3553   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, b_zero_point) {
3554     TEST_REQUIRES_X86_AVX;
3555     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3556       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3557         VAddCMicrokernelTester()
3558           .batch_size(batch_size)
3559           .b_zero_point(b_zero_point)
3560           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3561       }
3562     }
3563   }
3564 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,y_zero_point)3565   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, y_zero_point) {
3566     TEST_REQUIRES_X86_AVX;
3567     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3568       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3569         VAddCMicrokernelTester()
3570           .batch_size(batch_size)
3571           .y_zero_point(y_zero_point)
3572           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3573       }
3574     }
3575   }
3576 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,a_scale)3577   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, a_scale) {
3578     TEST_REQUIRES_X86_AVX;
3579     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3580       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3581         VAddCMicrokernelTester()
3582           .batch_size(batch_size)
3583           .a_scale(a_scale)
3584           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3585       }
3586     }
3587   }
3588 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,b_scale)3589   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, b_scale) {
3590     TEST_REQUIRES_X86_AVX;
3591     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3592       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3593         VAddCMicrokernelTester()
3594           .batch_size(batch_size)
3595           .b_scale(b_scale)
3596           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3597       }
3598     }
3599   }
3600 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,y_scale)3601   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, y_scale) {
3602     TEST_REQUIRES_X86_AVX;
3603     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3604       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3605         VAddCMicrokernelTester()
3606           .batch_size(batch_size)
3607           .y_scale(y_scale)
3608           .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3609       }
3610     }
3611   }
3612 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,qmin)3613   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, qmin) {
3614     TEST_REQUIRES_X86_AVX;
3615     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3616       VAddCMicrokernelTester()
3617         .batch_size(batch_size)
3618         .qmin(128)
3619         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3620     }
3621   }
3622 
TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32,qmax)3623   TEST(QS8_VADDC_MINMAX__AVX_MUL32_LD32_X32, qmax) {
3624     TEST_REQUIRES_X86_AVX;
3625     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
3626       VAddCMicrokernelTester()
3627         .batch_size(batch_size)
3628         .qmax(128)
3629         .Test(xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
3630     }
3631   }
3632 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3633 
3634 
3635 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,batch_eq_8)3636   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, batch_eq_8) {
3637     TEST_REQUIRES_X86_XOP;
3638     VAddCMicrokernelTester()
3639       .batch_size(8)
3640       .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3641   }
3642 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,batch_div_8)3643   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, batch_div_8) {
3644     TEST_REQUIRES_X86_XOP;
3645     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
3646       VAddCMicrokernelTester()
3647         .batch_size(batch_size)
3648         .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3649     }
3650   }
3651 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,batch_lt_8)3652   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, batch_lt_8) {
3653     TEST_REQUIRES_X86_XOP;
3654     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
3655       VAddCMicrokernelTester()
3656         .batch_size(batch_size)
3657         .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3658     }
3659   }
3660 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,batch_gt_8)3661   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, batch_gt_8) {
3662     TEST_REQUIRES_X86_XOP;
3663     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
3664       VAddCMicrokernelTester()
3665         .batch_size(batch_size)
3666         .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3667     }
3668   }
3669 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,inplace)3670   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, inplace) {
3671     TEST_REQUIRES_X86_XOP;
3672     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3673       VAddCMicrokernelTester()
3674         .batch_size(batch_size)
3675         .inplace(true)
3676         .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3677     }
3678   }
3679 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,a_zero_point)3680   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, a_zero_point) {
3681     TEST_REQUIRES_X86_XOP;
3682     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3683       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3684         VAddCMicrokernelTester()
3685           .batch_size(batch_size)
3686           .a_zero_point(a_zero_point)
3687           .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3688       }
3689     }
3690   }
3691 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,b_zero_point)3692   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, b_zero_point) {
3693     TEST_REQUIRES_X86_XOP;
3694     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3695       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3696         VAddCMicrokernelTester()
3697           .batch_size(batch_size)
3698           .b_zero_point(b_zero_point)
3699           .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3700       }
3701     }
3702   }
3703 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,y_zero_point)3704   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, y_zero_point) {
3705     TEST_REQUIRES_X86_XOP;
3706     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3707       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3708         VAddCMicrokernelTester()
3709           .batch_size(batch_size)
3710           .y_zero_point(y_zero_point)
3711           .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3712       }
3713     }
3714   }
3715 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,a_scale)3716   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, a_scale) {
3717     TEST_REQUIRES_X86_XOP;
3718     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3719       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3720         VAddCMicrokernelTester()
3721           .batch_size(batch_size)
3722           .a_scale(a_scale)
3723           .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3724       }
3725     }
3726   }
3727 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,b_scale)3728   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, b_scale) {
3729     TEST_REQUIRES_X86_XOP;
3730     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3731       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3732         VAddCMicrokernelTester()
3733           .batch_size(batch_size)
3734           .b_scale(b_scale)
3735           .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3736       }
3737     }
3738   }
3739 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,y_scale)3740   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, y_scale) {
3741     TEST_REQUIRES_X86_XOP;
3742     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3743       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3744         VAddCMicrokernelTester()
3745           .batch_size(batch_size)
3746           .y_scale(y_scale)
3747           .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3748       }
3749     }
3750   }
3751 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,qmin)3752   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, qmin) {
3753     TEST_REQUIRES_X86_XOP;
3754     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3755       VAddCMicrokernelTester()
3756         .batch_size(batch_size)
3757         .qmin(128)
3758         .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3759     }
3760   }
3761 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8,qmax)3762   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X8, qmax) {
3763     TEST_REQUIRES_X86_XOP;
3764     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3765       VAddCMicrokernelTester()
3766         .batch_size(batch_size)
3767         .qmax(128)
3768         .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8, xnn_init_qs8_add_minmax_sse4_mul32_params);
3769     }
3770   }
3771 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3772 
3773 
3774 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,batch_eq_16)3775   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, batch_eq_16) {
3776     TEST_REQUIRES_X86_XOP;
3777     VAddCMicrokernelTester()
3778       .batch_size(16)
3779       .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3780   }
3781 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,batch_div_16)3782   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, batch_div_16) {
3783     TEST_REQUIRES_X86_XOP;
3784     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
3785       VAddCMicrokernelTester()
3786         .batch_size(batch_size)
3787         .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3788     }
3789   }
3790 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,batch_lt_16)3791   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, batch_lt_16) {
3792     TEST_REQUIRES_X86_XOP;
3793     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
3794       VAddCMicrokernelTester()
3795         .batch_size(batch_size)
3796         .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3797     }
3798   }
3799 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,batch_gt_16)3800   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, batch_gt_16) {
3801     TEST_REQUIRES_X86_XOP;
3802     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
3803       VAddCMicrokernelTester()
3804         .batch_size(batch_size)
3805         .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3806     }
3807   }
3808 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,inplace)3809   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, inplace) {
3810     TEST_REQUIRES_X86_XOP;
3811     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3812       VAddCMicrokernelTester()
3813         .batch_size(batch_size)
3814         .inplace(true)
3815         .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3816     }
3817   }
3818 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,a_zero_point)3819   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, a_zero_point) {
3820     TEST_REQUIRES_X86_XOP;
3821     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3822       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3823         VAddCMicrokernelTester()
3824           .batch_size(batch_size)
3825           .a_zero_point(a_zero_point)
3826           .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3827       }
3828     }
3829   }
3830 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,b_zero_point)3831   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, b_zero_point) {
3832     TEST_REQUIRES_X86_XOP;
3833     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3834       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3835         VAddCMicrokernelTester()
3836           .batch_size(batch_size)
3837           .b_zero_point(b_zero_point)
3838           .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3839       }
3840     }
3841   }
3842 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,y_zero_point)3843   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, y_zero_point) {
3844     TEST_REQUIRES_X86_XOP;
3845     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3846       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3847         VAddCMicrokernelTester()
3848           .batch_size(batch_size)
3849           .y_zero_point(y_zero_point)
3850           .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3851       }
3852     }
3853   }
3854 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,a_scale)3855   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, a_scale) {
3856     TEST_REQUIRES_X86_XOP;
3857     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3858       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3859         VAddCMicrokernelTester()
3860           .batch_size(batch_size)
3861           .a_scale(a_scale)
3862           .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3863       }
3864     }
3865   }
3866 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,b_scale)3867   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, b_scale) {
3868     TEST_REQUIRES_X86_XOP;
3869     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3870       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
3871         VAddCMicrokernelTester()
3872           .batch_size(batch_size)
3873           .b_scale(b_scale)
3874           .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3875       }
3876     }
3877   }
3878 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,y_scale)3879   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, y_scale) {
3880     TEST_REQUIRES_X86_XOP;
3881     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3882       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
3883         VAddCMicrokernelTester()
3884           .batch_size(batch_size)
3885           .y_scale(y_scale)
3886           .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3887       }
3888     }
3889   }
3890 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,qmin)3891   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, qmin) {
3892     TEST_REQUIRES_X86_XOP;
3893     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3894       VAddCMicrokernelTester()
3895         .batch_size(batch_size)
3896         .qmin(128)
3897         .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3898     }
3899   }
3900 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16,qmax)3901   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X16, qmax) {
3902     TEST_REQUIRES_X86_XOP;
3903     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3904       VAddCMicrokernelTester()
3905         .batch_size(batch_size)
3906         .qmax(128)
3907         .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16, xnn_init_qs8_add_minmax_sse4_mul32_params);
3908     }
3909   }
3910 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3911 
3912 
3913 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,batch_eq_24)3914   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, batch_eq_24) {
3915     TEST_REQUIRES_X86_XOP;
3916     VAddCMicrokernelTester()
3917       .batch_size(24)
3918       .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3919   }
3920 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,batch_div_24)3921   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, batch_div_24) {
3922     TEST_REQUIRES_X86_XOP;
3923     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
3924       VAddCMicrokernelTester()
3925         .batch_size(batch_size)
3926         .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3927     }
3928   }
3929 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,batch_lt_24)3930   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, batch_lt_24) {
3931     TEST_REQUIRES_X86_XOP;
3932     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
3933       VAddCMicrokernelTester()
3934         .batch_size(batch_size)
3935         .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3936     }
3937   }
3938 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,batch_gt_24)3939   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, batch_gt_24) {
3940     TEST_REQUIRES_X86_XOP;
3941     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
3942       VAddCMicrokernelTester()
3943         .batch_size(batch_size)
3944         .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3945     }
3946   }
3947 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,inplace)3948   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, inplace) {
3949     TEST_REQUIRES_X86_XOP;
3950     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3951       VAddCMicrokernelTester()
3952         .batch_size(batch_size)
3953         .inplace(true)
3954         .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3955     }
3956   }
3957 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,a_zero_point)3958   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, a_zero_point) {
3959     TEST_REQUIRES_X86_XOP;
3960     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3961       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
3962         VAddCMicrokernelTester()
3963           .batch_size(batch_size)
3964           .a_zero_point(a_zero_point)
3965           .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3966       }
3967     }
3968   }
3969 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,b_zero_point)3970   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, b_zero_point) {
3971     TEST_REQUIRES_X86_XOP;
3972     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3973       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
3974         VAddCMicrokernelTester()
3975           .batch_size(batch_size)
3976           .b_zero_point(b_zero_point)
3977           .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3978       }
3979     }
3980   }
3981 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,y_zero_point)3982   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, y_zero_point) {
3983     TEST_REQUIRES_X86_XOP;
3984     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3985       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
3986         VAddCMicrokernelTester()
3987           .batch_size(batch_size)
3988           .y_zero_point(y_zero_point)
3989           .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
3990       }
3991     }
3992   }
3993 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,a_scale)3994   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, a_scale) {
3995     TEST_REQUIRES_X86_XOP;
3996     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3997       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
3998         VAddCMicrokernelTester()
3999           .batch_size(batch_size)
4000           .a_scale(a_scale)
4001           .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4002       }
4003     }
4004   }
4005 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,b_scale)4006   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, b_scale) {
4007     TEST_REQUIRES_X86_XOP;
4008     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4009       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4010         VAddCMicrokernelTester()
4011           .batch_size(batch_size)
4012           .b_scale(b_scale)
4013           .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4014       }
4015     }
4016   }
4017 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,y_scale)4018   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, y_scale) {
4019     TEST_REQUIRES_X86_XOP;
4020     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4021       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4022         VAddCMicrokernelTester()
4023           .batch_size(batch_size)
4024           .y_scale(y_scale)
4025           .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4026       }
4027     }
4028   }
4029 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,qmin)4030   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, qmin) {
4031     TEST_REQUIRES_X86_XOP;
4032     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4033       VAddCMicrokernelTester()
4034         .batch_size(batch_size)
4035         .qmin(128)
4036         .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4037     }
4038   }
4039 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24,qmax)4040   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X24, qmax) {
4041     TEST_REQUIRES_X86_XOP;
4042     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4043       VAddCMicrokernelTester()
4044         .batch_size(batch_size)
4045         .qmax(128)
4046         .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24, xnn_init_qs8_add_minmax_sse4_mul32_params);
4047     }
4048   }
4049 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4050 
4051 
4052 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,batch_eq_32)4053   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, batch_eq_32) {
4054     TEST_REQUIRES_X86_XOP;
4055     VAddCMicrokernelTester()
4056       .batch_size(32)
4057       .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4058   }
4059 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,batch_div_32)4060   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, batch_div_32) {
4061     TEST_REQUIRES_X86_XOP;
4062     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
4063       VAddCMicrokernelTester()
4064         .batch_size(batch_size)
4065         .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4066     }
4067   }
4068 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,batch_lt_32)4069   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, batch_lt_32) {
4070     TEST_REQUIRES_X86_XOP;
4071     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
4072       VAddCMicrokernelTester()
4073         .batch_size(batch_size)
4074         .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4075     }
4076   }
4077 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,batch_gt_32)4078   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, batch_gt_32) {
4079     TEST_REQUIRES_X86_XOP;
4080     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
4081       VAddCMicrokernelTester()
4082         .batch_size(batch_size)
4083         .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4084     }
4085   }
4086 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,inplace)4087   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, inplace) {
4088     TEST_REQUIRES_X86_XOP;
4089     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4090       VAddCMicrokernelTester()
4091         .batch_size(batch_size)
4092         .inplace(true)
4093         .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4094     }
4095   }
4096 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,a_zero_point)4097   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, a_zero_point) {
4098     TEST_REQUIRES_X86_XOP;
4099     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4100       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4101         VAddCMicrokernelTester()
4102           .batch_size(batch_size)
4103           .a_zero_point(a_zero_point)
4104           .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4105       }
4106     }
4107   }
4108 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,b_zero_point)4109   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, b_zero_point) {
4110     TEST_REQUIRES_X86_XOP;
4111     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4112       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4113         VAddCMicrokernelTester()
4114           .batch_size(batch_size)
4115           .b_zero_point(b_zero_point)
4116           .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4117       }
4118     }
4119   }
4120 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,y_zero_point)4121   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, y_zero_point) {
4122     TEST_REQUIRES_X86_XOP;
4123     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4124       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4125         VAddCMicrokernelTester()
4126           .batch_size(batch_size)
4127           .y_zero_point(y_zero_point)
4128           .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4129       }
4130     }
4131   }
4132 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,a_scale)4133   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, a_scale) {
4134     TEST_REQUIRES_X86_XOP;
4135     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4136       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4137         VAddCMicrokernelTester()
4138           .batch_size(batch_size)
4139           .a_scale(a_scale)
4140           .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4141       }
4142     }
4143   }
4144 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,b_scale)4145   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, b_scale) {
4146     TEST_REQUIRES_X86_XOP;
4147     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4148       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4149         VAddCMicrokernelTester()
4150           .batch_size(batch_size)
4151           .b_scale(b_scale)
4152           .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4153       }
4154     }
4155   }
4156 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,y_scale)4157   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, y_scale) {
4158     TEST_REQUIRES_X86_XOP;
4159     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4160       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4161         VAddCMicrokernelTester()
4162           .batch_size(batch_size)
4163           .y_scale(y_scale)
4164           .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4165       }
4166     }
4167   }
4168 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,qmin)4169   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, qmin) {
4170     TEST_REQUIRES_X86_XOP;
4171     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4172       VAddCMicrokernelTester()
4173         .batch_size(batch_size)
4174         .qmin(128)
4175         .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4176     }
4177   }
4178 
TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32,qmax)4179   TEST(QS8_VADDC_MINMAX__XOP_MUL32_LD32_X32, qmax) {
4180     TEST_REQUIRES_X86_XOP;
4181     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4182       VAddCMicrokernelTester()
4183         .batch_size(batch_size)
4184         .qmax(128)
4185         .Test(xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32, xnn_init_qs8_add_minmax_sse4_mul32_params);
4186     }
4187   }
4188 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4189 
4190 
4191 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,batch_eq_8)4192   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, batch_eq_8) {
4193     TEST_REQUIRES_X86_AVX2;
4194     VAddCMicrokernelTester()
4195       .batch_size(8)
4196       .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4197   }
4198 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,batch_div_8)4199   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, batch_div_8) {
4200     TEST_REQUIRES_X86_AVX2;
4201     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
4202       VAddCMicrokernelTester()
4203         .batch_size(batch_size)
4204         .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4205     }
4206   }
4207 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,batch_lt_8)4208   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, batch_lt_8) {
4209     TEST_REQUIRES_X86_AVX2;
4210     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
4211       VAddCMicrokernelTester()
4212         .batch_size(batch_size)
4213         .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4214     }
4215   }
4216 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,batch_gt_8)4217   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, batch_gt_8) {
4218     TEST_REQUIRES_X86_AVX2;
4219     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
4220       VAddCMicrokernelTester()
4221         .batch_size(batch_size)
4222         .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4223     }
4224   }
4225 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,inplace)4226   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, inplace) {
4227     TEST_REQUIRES_X86_AVX2;
4228     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4229       VAddCMicrokernelTester()
4230         .batch_size(batch_size)
4231         .inplace(true)
4232         .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4233     }
4234   }
4235 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,a_zero_point)4236   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, a_zero_point) {
4237     TEST_REQUIRES_X86_AVX2;
4238     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4239       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4240         VAddCMicrokernelTester()
4241           .batch_size(batch_size)
4242           .a_zero_point(a_zero_point)
4243           .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4244       }
4245     }
4246   }
4247 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,b_zero_point)4248   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, b_zero_point) {
4249     TEST_REQUIRES_X86_AVX2;
4250     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4251       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4252         VAddCMicrokernelTester()
4253           .batch_size(batch_size)
4254           .b_zero_point(b_zero_point)
4255           .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4256       }
4257     }
4258   }
4259 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,y_zero_point)4260   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, y_zero_point) {
4261     TEST_REQUIRES_X86_AVX2;
4262     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4263       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4264         VAddCMicrokernelTester()
4265           .batch_size(batch_size)
4266           .y_zero_point(y_zero_point)
4267           .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4268       }
4269     }
4270   }
4271 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,a_scale)4272   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, a_scale) {
4273     TEST_REQUIRES_X86_AVX2;
4274     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4275       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4276         VAddCMicrokernelTester()
4277           .batch_size(batch_size)
4278           .a_scale(a_scale)
4279           .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4280       }
4281     }
4282   }
4283 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,b_scale)4284   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, b_scale) {
4285     TEST_REQUIRES_X86_AVX2;
4286     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4287       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4288         VAddCMicrokernelTester()
4289           .batch_size(batch_size)
4290           .b_scale(b_scale)
4291           .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4292       }
4293     }
4294   }
4295 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,y_scale)4296   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, y_scale) {
4297     TEST_REQUIRES_X86_AVX2;
4298     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4299       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4300         VAddCMicrokernelTester()
4301           .batch_size(batch_size)
4302           .y_scale(y_scale)
4303           .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4304       }
4305     }
4306   }
4307 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,qmin)4308   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, qmin) {
4309     TEST_REQUIRES_X86_AVX2;
4310     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4311       VAddCMicrokernelTester()
4312         .batch_size(batch_size)
4313         .qmin(128)
4314         .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4315     }
4316   }
4317 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8,qmax)4318   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X8, qmax) {
4319     TEST_REQUIRES_X86_AVX2;
4320     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4321       VAddCMicrokernelTester()
4322         .batch_size(batch_size)
4323         .qmax(128)
4324         .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8, xnn_init_qs8_add_minmax_avx2_params);
4325     }
4326   }
4327 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4328 
4329 
4330 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,batch_eq_16)4331   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, batch_eq_16) {
4332     TEST_REQUIRES_X86_AVX2;
4333     VAddCMicrokernelTester()
4334       .batch_size(16)
4335       .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4336   }
4337 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,batch_div_16)4338   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, batch_div_16) {
4339     TEST_REQUIRES_X86_AVX2;
4340     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
4341       VAddCMicrokernelTester()
4342         .batch_size(batch_size)
4343         .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4344     }
4345   }
4346 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,batch_lt_16)4347   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, batch_lt_16) {
4348     TEST_REQUIRES_X86_AVX2;
4349     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
4350       VAddCMicrokernelTester()
4351         .batch_size(batch_size)
4352         .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4353     }
4354   }
4355 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,batch_gt_16)4356   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, batch_gt_16) {
4357     TEST_REQUIRES_X86_AVX2;
4358     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
4359       VAddCMicrokernelTester()
4360         .batch_size(batch_size)
4361         .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4362     }
4363   }
4364 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,inplace)4365   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, inplace) {
4366     TEST_REQUIRES_X86_AVX2;
4367     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4368       VAddCMicrokernelTester()
4369         .batch_size(batch_size)
4370         .inplace(true)
4371         .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4372     }
4373   }
4374 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,a_zero_point)4375   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, a_zero_point) {
4376     TEST_REQUIRES_X86_AVX2;
4377     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4378       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4379         VAddCMicrokernelTester()
4380           .batch_size(batch_size)
4381           .a_zero_point(a_zero_point)
4382           .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4383       }
4384     }
4385   }
4386 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,b_zero_point)4387   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, b_zero_point) {
4388     TEST_REQUIRES_X86_AVX2;
4389     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4390       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4391         VAddCMicrokernelTester()
4392           .batch_size(batch_size)
4393           .b_zero_point(b_zero_point)
4394           .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4395       }
4396     }
4397   }
4398 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,y_zero_point)4399   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, y_zero_point) {
4400     TEST_REQUIRES_X86_AVX2;
4401     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4402       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4403         VAddCMicrokernelTester()
4404           .batch_size(batch_size)
4405           .y_zero_point(y_zero_point)
4406           .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4407       }
4408     }
4409   }
4410 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,a_scale)4411   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, a_scale) {
4412     TEST_REQUIRES_X86_AVX2;
4413     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4414       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4415         VAddCMicrokernelTester()
4416           .batch_size(batch_size)
4417           .a_scale(a_scale)
4418           .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4419       }
4420     }
4421   }
4422 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,b_scale)4423   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, b_scale) {
4424     TEST_REQUIRES_X86_AVX2;
4425     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4426       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4427         VAddCMicrokernelTester()
4428           .batch_size(batch_size)
4429           .b_scale(b_scale)
4430           .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4431       }
4432     }
4433   }
4434 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,y_scale)4435   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, y_scale) {
4436     TEST_REQUIRES_X86_AVX2;
4437     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4438       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4439         VAddCMicrokernelTester()
4440           .batch_size(batch_size)
4441           .y_scale(y_scale)
4442           .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4443       }
4444     }
4445   }
4446 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,qmin)4447   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, qmin) {
4448     TEST_REQUIRES_X86_AVX2;
4449     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4450       VAddCMicrokernelTester()
4451         .batch_size(batch_size)
4452         .qmin(128)
4453         .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4454     }
4455   }
4456 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16,qmax)4457   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X16, qmax) {
4458     TEST_REQUIRES_X86_AVX2;
4459     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4460       VAddCMicrokernelTester()
4461         .batch_size(batch_size)
4462         .qmax(128)
4463         .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16, xnn_init_qs8_add_minmax_avx2_params);
4464     }
4465   }
4466 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4467 
4468 
4469 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,batch_eq_24)4470   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, batch_eq_24) {
4471     TEST_REQUIRES_X86_AVX2;
4472     VAddCMicrokernelTester()
4473       .batch_size(24)
4474       .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4475   }
4476 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,batch_div_24)4477   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, batch_div_24) {
4478     TEST_REQUIRES_X86_AVX2;
4479     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
4480       VAddCMicrokernelTester()
4481         .batch_size(batch_size)
4482         .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4483     }
4484   }
4485 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,batch_lt_24)4486   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, batch_lt_24) {
4487     TEST_REQUIRES_X86_AVX2;
4488     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
4489       VAddCMicrokernelTester()
4490         .batch_size(batch_size)
4491         .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4492     }
4493   }
4494 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,batch_gt_24)4495   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, batch_gt_24) {
4496     TEST_REQUIRES_X86_AVX2;
4497     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
4498       VAddCMicrokernelTester()
4499         .batch_size(batch_size)
4500         .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4501     }
4502   }
4503 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,inplace)4504   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, inplace) {
4505     TEST_REQUIRES_X86_AVX2;
4506     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4507       VAddCMicrokernelTester()
4508         .batch_size(batch_size)
4509         .inplace(true)
4510         .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4511     }
4512   }
4513 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,a_zero_point)4514   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, a_zero_point) {
4515     TEST_REQUIRES_X86_AVX2;
4516     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4517       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4518         VAddCMicrokernelTester()
4519           .batch_size(batch_size)
4520           .a_zero_point(a_zero_point)
4521           .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4522       }
4523     }
4524   }
4525 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,b_zero_point)4526   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, b_zero_point) {
4527     TEST_REQUIRES_X86_AVX2;
4528     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4529       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4530         VAddCMicrokernelTester()
4531           .batch_size(batch_size)
4532           .b_zero_point(b_zero_point)
4533           .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4534       }
4535     }
4536   }
4537 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,y_zero_point)4538   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, y_zero_point) {
4539     TEST_REQUIRES_X86_AVX2;
4540     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4541       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4542         VAddCMicrokernelTester()
4543           .batch_size(batch_size)
4544           .y_zero_point(y_zero_point)
4545           .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4546       }
4547     }
4548   }
4549 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,a_scale)4550   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, a_scale) {
4551     TEST_REQUIRES_X86_AVX2;
4552     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4553       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4554         VAddCMicrokernelTester()
4555           .batch_size(batch_size)
4556           .a_scale(a_scale)
4557           .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4558       }
4559     }
4560   }
4561 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,b_scale)4562   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, b_scale) {
4563     TEST_REQUIRES_X86_AVX2;
4564     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4565       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4566         VAddCMicrokernelTester()
4567           .batch_size(batch_size)
4568           .b_scale(b_scale)
4569           .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4570       }
4571     }
4572   }
4573 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,y_scale)4574   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, y_scale) {
4575     TEST_REQUIRES_X86_AVX2;
4576     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4577       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4578         VAddCMicrokernelTester()
4579           .batch_size(batch_size)
4580           .y_scale(y_scale)
4581           .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4582       }
4583     }
4584   }
4585 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,qmin)4586   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, qmin) {
4587     TEST_REQUIRES_X86_AVX2;
4588     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4589       VAddCMicrokernelTester()
4590         .batch_size(batch_size)
4591         .qmin(128)
4592         .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4593     }
4594   }
4595 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24,qmax)4596   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X24, qmax) {
4597     TEST_REQUIRES_X86_AVX2;
4598     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4599       VAddCMicrokernelTester()
4600         .batch_size(batch_size)
4601         .qmax(128)
4602         .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x24, xnn_init_qs8_add_minmax_avx2_params);
4603     }
4604   }
4605 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4606 
4607 
4608 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,batch_eq_32)4609   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, batch_eq_32) {
4610     TEST_REQUIRES_X86_AVX2;
4611     VAddCMicrokernelTester()
4612       .batch_size(32)
4613       .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4614   }
4615 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,batch_div_32)4616   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, batch_div_32) {
4617     TEST_REQUIRES_X86_AVX2;
4618     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
4619       VAddCMicrokernelTester()
4620         .batch_size(batch_size)
4621         .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4622     }
4623   }
4624 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,batch_lt_32)4625   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, batch_lt_32) {
4626     TEST_REQUIRES_X86_AVX2;
4627     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
4628       VAddCMicrokernelTester()
4629         .batch_size(batch_size)
4630         .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4631     }
4632   }
4633 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,batch_gt_32)4634   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, batch_gt_32) {
4635     TEST_REQUIRES_X86_AVX2;
4636     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
4637       VAddCMicrokernelTester()
4638         .batch_size(batch_size)
4639         .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4640     }
4641   }
4642 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,inplace)4643   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, inplace) {
4644     TEST_REQUIRES_X86_AVX2;
4645     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4646       VAddCMicrokernelTester()
4647         .batch_size(batch_size)
4648         .inplace(true)
4649         .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4650     }
4651   }
4652 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,a_zero_point)4653   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, a_zero_point) {
4654     TEST_REQUIRES_X86_AVX2;
4655     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4656       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4657         VAddCMicrokernelTester()
4658           .batch_size(batch_size)
4659           .a_zero_point(a_zero_point)
4660           .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4661       }
4662     }
4663   }
4664 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,b_zero_point)4665   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, b_zero_point) {
4666     TEST_REQUIRES_X86_AVX2;
4667     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4668       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4669         VAddCMicrokernelTester()
4670           .batch_size(batch_size)
4671           .b_zero_point(b_zero_point)
4672           .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4673       }
4674     }
4675   }
4676 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,y_zero_point)4677   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, y_zero_point) {
4678     TEST_REQUIRES_X86_AVX2;
4679     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4680       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4681         VAddCMicrokernelTester()
4682           .batch_size(batch_size)
4683           .y_zero_point(y_zero_point)
4684           .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4685       }
4686     }
4687   }
4688 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,a_scale)4689   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, a_scale) {
4690     TEST_REQUIRES_X86_AVX2;
4691     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4692       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4693         VAddCMicrokernelTester()
4694           .batch_size(batch_size)
4695           .a_scale(a_scale)
4696           .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4697       }
4698     }
4699   }
4700 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,b_scale)4701   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, b_scale) {
4702     TEST_REQUIRES_X86_AVX2;
4703     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4704       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4705         VAddCMicrokernelTester()
4706           .batch_size(batch_size)
4707           .b_scale(b_scale)
4708           .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4709       }
4710     }
4711   }
4712 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,y_scale)4713   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, y_scale) {
4714     TEST_REQUIRES_X86_AVX2;
4715     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4716       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4717         VAddCMicrokernelTester()
4718           .batch_size(batch_size)
4719           .y_scale(y_scale)
4720           .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4721       }
4722     }
4723   }
4724 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,qmin)4725   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, qmin) {
4726     TEST_REQUIRES_X86_AVX2;
4727     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4728       VAddCMicrokernelTester()
4729         .batch_size(batch_size)
4730         .qmin(128)
4731         .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4732     }
4733   }
4734 
TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32,qmax)4735   TEST(QS8_VADDC_MINMAX__AVX2_MUL32_LD64_X32, qmax) {
4736     TEST_REQUIRES_X86_AVX2;
4737     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4738       VAddCMicrokernelTester()
4739         .batch_size(batch_size)
4740         .qmax(128)
4741         .Test(xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32, xnn_init_qs8_add_minmax_avx2_params);
4742     }
4743   }
4744 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4745 
4746 
4747 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,batch_eq_16)4748   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_eq_16) {
4749     TEST_REQUIRES_X86_AVX512SKX;
4750     VAddCMicrokernelTester()
4751       .batch_size(16)
4752       .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4753   }
4754 
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,batch_div_16)4755   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_div_16) {
4756     TEST_REQUIRES_X86_AVX512SKX;
4757     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
4758       VAddCMicrokernelTester()
4759         .batch_size(batch_size)
4760         .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4761     }
4762   }
4763 
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,batch_lt_16)4764   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_lt_16) {
4765     TEST_REQUIRES_X86_AVX512SKX;
4766     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
4767       VAddCMicrokernelTester()
4768         .batch_size(batch_size)
4769         .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4770     }
4771   }
4772 
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,batch_gt_16)4773   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, batch_gt_16) {
4774     TEST_REQUIRES_X86_AVX512SKX;
4775     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
4776       VAddCMicrokernelTester()
4777         .batch_size(batch_size)
4778         .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4779     }
4780   }
4781 
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,inplace)4782   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, inplace) {
4783     TEST_REQUIRES_X86_AVX512SKX;
4784     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4785       VAddCMicrokernelTester()
4786         .batch_size(batch_size)
4787         .inplace(true)
4788         .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4789     }
4790   }
4791 
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,a_zero_point)4792   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, a_zero_point) {
4793     TEST_REQUIRES_X86_AVX512SKX;
4794     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4795       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4796         VAddCMicrokernelTester()
4797           .batch_size(batch_size)
4798           .a_zero_point(a_zero_point)
4799           .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4800       }
4801     }
4802   }
4803 
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,b_zero_point)4804   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, b_zero_point) {
4805     TEST_REQUIRES_X86_AVX512SKX;
4806     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4807       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4808         VAddCMicrokernelTester()
4809           .batch_size(batch_size)
4810           .b_zero_point(b_zero_point)
4811           .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4812       }
4813     }
4814   }
4815 
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,y_zero_point)4816   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, y_zero_point) {
4817     TEST_REQUIRES_X86_AVX512SKX;
4818     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4819       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4820         VAddCMicrokernelTester()
4821           .batch_size(batch_size)
4822           .y_zero_point(y_zero_point)
4823           .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4824       }
4825     }
4826   }
4827 
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,a_scale)4828   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, a_scale) {
4829     TEST_REQUIRES_X86_AVX512SKX;
4830     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4831       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4832         VAddCMicrokernelTester()
4833           .batch_size(batch_size)
4834           .a_scale(a_scale)
4835           .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4836       }
4837     }
4838   }
4839 
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,b_scale)4840   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, b_scale) {
4841     TEST_REQUIRES_X86_AVX512SKX;
4842     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4843       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4844         VAddCMicrokernelTester()
4845           .batch_size(batch_size)
4846           .b_scale(b_scale)
4847           .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4848       }
4849     }
4850   }
4851 
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,y_scale)4852   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, y_scale) {
4853     TEST_REQUIRES_X86_AVX512SKX;
4854     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4855       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4856         VAddCMicrokernelTester()
4857           .batch_size(batch_size)
4858           .y_scale(y_scale)
4859           .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4860       }
4861     }
4862   }
4863 
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,qmin)4864   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, qmin) {
4865     TEST_REQUIRES_X86_AVX512SKX;
4866     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4867       VAddCMicrokernelTester()
4868         .batch_size(batch_size)
4869         .qmin(128)
4870         .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4871     }
4872   }
4873 
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16,qmax)4874   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X16, qmax) {
4875     TEST_REQUIRES_X86_AVX512SKX;
4876     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4877       VAddCMicrokernelTester()
4878         .batch_size(batch_size)
4879         .qmax(128)
4880         .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16, xnn_init_qs8_add_minmax_avx512_params);
4881     }
4882   }
4883 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4884 
4885 
4886 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,batch_eq_32)4887   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_eq_32) {
4888     TEST_REQUIRES_X86_AVX512SKX;
4889     VAddCMicrokernelTester()
4890       .batch_size(32)
4891       .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
4892   }
4893 
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,batch_div_32)4894   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_div_32) {
4895     TEST_REQUIRES_X86_AVX512SKX;
4896     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
4897       VAddCMicrokernelTester()
4898         .batch_size(batch_size)
4899         .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
4900     }
4901   }
4902 
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,batch_lt_32)4903   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_lt_32) {
4904     TEST_REQUIRES_X86_AVX512SKX;
4905     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
4906       VAddCMicrokernelTester()
4907         .batch_size(batch_size)
4908         .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
4909     }
4910   }
4911 
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,batch_gt_32)4912   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, batch_gt_32) {
4913     TEST_REQUIRES_X86_AVX512SKX;
4914     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
4915       VAddCMicrokernelTester()
4916         .batch_size(batch_size)
4917         .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
4918     }
4919   }
4920 
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,inplace)4921   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, inplace) {
4922     TEST_REQUIRES_X86_AVX512SKX;
4923     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4924       VAddCMicrokernelTester()
4925         .batch_size(batch_size)
4926         .inplace(true)
4927         .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
4928     }
4929   }
4930 
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,a_zero_point)4931   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, a_zero_point) {
4932     TEST_REQUIRES_X86_AVX512SKX;
4933     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4934       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
4935         VAddCMicrokernelTester()
4936           .batch_size(batch_size)
4937           .a_zero_point(a_zero_point)
4938           .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
4939       }
4940     }
4941   }
4942 
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,b_zero_point)4943   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, b_zero_point) {
4944     TEST_REQUIRES_X86_AVX512SKX;
4945     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4946       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
4947         VAddCMicrokernelTester()
4948           .batch_size(batch_size)
4949           .b_zero_point(b_zero_point)
4950           .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
4951       }
4952     }
4953   }
4954 
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,y_zero_point)4955   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, y_zero_point) {
4956     TEST_REQUIRES_X86_AVX512SKX;
4957     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4958       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
4959         VAddCMicrokernelTester()
4960           .batch_size(batch_size)
4961           .y_zero_point(y_zero_point)
4962           .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
4963       }
4964     }
4965   }
4966 
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,a_scale)4967   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, a_scale) {
4968     TEST_REQUIRES_X86_AVX512SKX;
4969     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4970       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
4971         VAddCMicrokernelTester()
4972           .batch_size(batch_size)
4973           .a_scale(a_scale)
4974           .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
4975       }
4976     }
4977   }
4978 
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,b_scale)4979   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, b_scale) {
4980     TEST_REQUIRES_X86_AVX512SKX;
4981     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4982       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
4983         VAddCMicrokernelTester()
4984           .batch_size(batch_size)
4985           .b_scale(b_scale)
4986           .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
4987       }
4988     }
4989   }
4990 
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,y_scale)4991   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, y_scale) {
4992     TEST_REQUIRES_X86_AVX512SKX;
4993     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4994       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
4995         VAddCMicrokernelTester()
4996           .batch_size(batch_size)
4997           .y_scale(y_scale)
4998           .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
4999       }
5000     }
5001   }
5002 
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,qmin)5003   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, qmin) {
5004     TEST_REQUIRES_X86_AVX512SKX;
5005     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5006       VAddCMicrokernelTester()
5007         .batch_size(batch_size)
5008         .qmin(128)
5009         .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5010     }
5011   }
5012 
TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32,qmax)5013   TEST(QS8_VADDC_MINMAX__AVX512SKX_MUL32_LD128_X32, qmax) {
5014     TEST_REQUIRES_X86_AVX512SKX;
5015     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5016       VAddCMicrokernelTester()
5017         .batch_size(batch_size)
5018         .qmax(128)
5019         .Test(xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32, xnn_init_qs8_add_minmax_avx512_params);
5020     }
5021   }
5022 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5023 
5024 
5025 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,batch_eq_8)5026   TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, batch_eq_8) {
5027     VAddCMicrokernelTester()
5028       .batch_size(8)
5029       .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5030   }
5031 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,batch_div_8)5032   TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, batch_div_8) {
5033     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
5034       VAddCMicrokernelTester()
5035         .batch_size(batch_size)
5036         .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5037     }
5038   }
5039 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,batch_lt_8)5040   TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, batch_lt_8) {
5041     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
5042       VAddCMicrokernelTester()
5043         .batch_size(batch_size)
5044         .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5045     }
5046   }
5047 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,batch_gt_8)5048   TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, batch_gt_8) {
5049     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
5050       VAddCMicrokernelTester()
5051         .batch_size(batch_size)
5052         .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5053     }
5054   }
5055 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,inplace)5056   TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, inplace) {
5057     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5058       VAddCMicrokernelTester()
5059         .batch_size(batch_size)
5060         .inplace(true)
5061         .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5062     }
5063   }
5064 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,a_zero_point)5065   TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, a_zero_point) {
5066     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5067       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5068         VAddCMicrokernelTester()
5069           .batch_size(batch_size)
5070           .a_zero_point(a_zero_point)
5071           .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5072       }
5073     }
5074   }
5075 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,b_zero_point)5076   TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, b_zero_point) {
5077     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5078       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5079         VAddCMicrokernelTester()
5080           .batch_size(batch_size)
5081           .b_zero_point(b_zero_point)
5082           .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5083       }
5084     }
5085   }
5086 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,y_zero_point)5087   TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, y_zero_point) {
5088     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5089       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5090         VAddCMicrokernelTester()
5091           .batch_size(batch_size)
5092           .y_zero_point(y_zero_point)
5093           .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5094       }
5095     }
5096   }
5097 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,a_scale)5098   TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, a_scale) {
5099     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5100       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5101         VAddCMicrokernelTester()
5102           .batch_size(batch_size)
5103           .a_scale(a_scale)
5104           .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5105       }
5106     }
5107   }
5108 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,b_scale)5109   TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, b_scale) {
5110     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5111       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5112         VAddCMicrokernelTester()
5113           .batch_size(batch_size)
5114           .b_scale(b_scale)
5115           .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5116       }
5117     }
5118   }
5119 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,y_scale)5120   TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, y_scale) {
5121     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5122       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5123         VAddCMicrokernelTester()
5124           .batch_size(batch_size)
5125           .y_scale(y_scale)
5126           .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5127       }
5128     }
5129   }
5130 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,qmin)5131   TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, qmin) {
5132     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5133       VAddCMicrokernelTester()
5134         .batch_size(batch_size)
5135         .qmin(128)
5136         .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5137     }
5138   }
5139 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X8,qmax)5140   TEST(QS8_VADDC_MINMAX__WASMSIMD_X8, qmax) {
5141     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5142       VAddCMicrokernelTester()
5143         .batch_size(batch_size)
5144         .qmax(128)
5145         .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8, xnn_init_qs8_add_minmax_wasmsimd_params);
5146     }
5147   }
5148 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5149 
5150 
5151 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,batch_eq_16)5152   TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, batch_eq_16) {
5153     VAddCMicrokernelTester()
5154       .batch_size(16)
5155       .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5156   }
5157 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,batch_div_16)5158   TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, batch_div_16) {
5159     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
5160       VAddCMicrokernelTester()
5161         .batch_size(batch_size)
5162         .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5163     }
5164   }
5165 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,batch_lt_16)5166   TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, batch_lt_16) {
5167     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
5168       VAddCMicrokernelTester()
5169         .batch_size(batch_size)
5170         .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5171     }
5172   }
5173 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,batch_gt_16)5174   TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, batch_gt_16) {
5175     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
5176       VAddCMicrokernelTester()
5177         .batch_size(batch_size)
5178         .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5179     }
5180   }
5181 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,inplace)5182   TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, inplace) {
5183     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5184       VAddCMicrokernelTester()
5185         .batch_size(batch_size)
5186         .inplace(true)
5187         .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5188     }
5189   }
5190 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,a_zero_point)5191   TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, a_zero_point) {
5192     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5193       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5194         VAddCMicrokernelTester()
5195           .batch_size(batch_size)
5196           .a_zero_point(a_zero_point)
5197           .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5198       }
5199     }
5200   }
5201 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,b_zero_point)5202   TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, b_zero_point) {
5203     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5204       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5205         VAddCMicrokernelTester()
5206           .batch_size(batch_size)
5207           .b_zero_point(b_zero_point)
5208           .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5209       }
5210     }
5211   }
5212 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,y_zero_point)5213   TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, y_zero_point) {
5214     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5215       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5216         VAddCMicrokernelTester()
5217           .batch_size(batch_size)
5218           .y_zero_point(y_zero_point)
5219           .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5220       }
5221     }
5222   }
5223 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,a_scale)5224   TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, a_scale) {
5225     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5226       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5227         VAddCMicrokernelTester()
5228           .batch_size(batch_size)
5229           .a_scale(a_scale)
5230           .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5231       }
5232     }
5233   }
5234 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,b_scale)5235   TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, b_scale) {
5236     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5237       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5238         VAddCMicrokernelTester()
5239           .batch_size(batch_size)
5240           .b_scale(b_scale)
5241           .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5242       }
5243     }
5244   }
5245 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,y_scale)5246   TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, y_scale) {
5247     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5248       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5249         VAddCMicrokernelTester()
5250           .batch_size(batch_size)
5251           .y_scale(y_scale)
5252           .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5253       }
5254     }
5255   }
5256 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,qmin)5257   TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, qmin) {
5258     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5259       VAddCMicrokernelTester()
5260         .batch_size(batch_size)
5261         .qmin(128)
5262         .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5263     }
5264   }
5265 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X16,qmax)5266   TEST(QS8_VADDC_MINMAX__WASMSIMD_X16, qmax) {
5267     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5268       VAddCMicrokernelTester()
5269         .batch_size(batch_size)
5270         .qmax(128)
5271         .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16, xnn_init_qs8_add_minmax_wasmsimd_params);
5272     }
5273   }
5274 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5275 
5276 
5277 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,batch_eq_24)5278   TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, batch_eq_24) {
5279     VAddCMicrokernelTester()
5280       .batch_size(24)
5281       .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5282   }
5283 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,batch_div_24)5284   TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, batch_div_24) {
5285     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
5286       VAddCMicrokernelTester()
5287         .batch_size(batch_size)
5288         .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5289     }
5290   }
5291 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,batch_lt_24)5292   TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, batch_lt_24) {
5293     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
5294       VAddCMicrokernelTester()
5295         .batch_size(batch_size)
5296         .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5297     }
5298   }
5299 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,batch_gt_24)5300   TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, batch_gt_24) {
5301     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
5302       VAddCMicrokernelTester()
5303         .batch_size(batch_size)
5304         .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5305     }
5306   }
5307 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,inplace)5308   TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, inplace) {
5309     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5310       VAddCMicrokernelTester()
5311         .batch_size(batch_size)
5312         .inplace(true)
5313         .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5314     }
5315   }
5316 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,a_zero_point)5317   TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, a_zero_point) {
5318     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5319       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5320         VAddCMicrokernelTester()
5321           .batch_size(batch_size)
5322           .a_zero_point(a_zero_point)
5323           .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5324       }
5325     }
5326   }
5327 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,b_zero_point)5328   TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, b_zero_point) {
5329     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5330       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5331         VAddCMicrokernelTester()
5332           .batch_size(batch_size)
5333           .b_zero_point(b_zero_point)
5334           .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5335       }
5336     }
5337   }
5338 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,y_zero_point)5339   TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, y_zero_point) {
5340     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5341       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5342         VAddCMicrokernelTester()
5343           .batch_size(batch_size)
5344           .y_zero_point(y_zero_point)
5345           .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5346       }
5347     }
5348   }
5349 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,a_scale)5350   TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, a_scale) {
5351     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5352       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5353         VAddCMicrokernelTester()
5354           .batch_size(batch_size)
5355           .a_scale(a_scale)
5356           .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5357       }
5358     }
5359   }
5360 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,b_scale)5361   TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, b_scale) {
5362     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5363       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5364         VAddCMicrokernelTester()
5365           .batch_size(batch_size)
5366           .b_scale(b_scale)
5367           .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5368       }
5369     }
5370   }
5371 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,y_scale)5372   TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, y_scale) {
5373     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5374       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5375         VAddCMicrokernelTester()
5376           .batch_size(batch_size)
5377           .y_scale(y_scale)
5378           .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5379       }
5380     }
5381   }
5382 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,qmin)5383   TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, qmin) {
5384     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5385       VAddCMicrokernelTester()
5386         .batch_size(batch_size)
5387         .qmin(128)
5388         .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5389     }
5390   }
5391 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X24,qmax)5392   TEST(QS8_VADDC_MINMAX__WASMSIMD_X24, qmax) {
5393     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5394       VAddCMicrokernelTester()
5395         .batch_size(batch_size)
5396         .qmax(128)
5397         .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x24, xnn_init_qs8_add_minmax_wasmsimd_params);
5398     }
5399   }
5400 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5401 
5402 
5403 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,batch_eq_32)5404   TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, batch_eq_32) {
5405     VAddCMicrokernelTester()
5406       .batch_size(32)
5407       .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5408   }
5409 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,batch_div_32)5410   TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, batch_div_32) {
5411     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
5412       VAddCMicrokernelTester()
5413         .batch_size(batch_size)
5414         .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5415     }
5416   }
5417 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,batch_lt_32)5418   TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, batch_lt_32) {
5419     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
5420       VAddCMicrokernelTester()
5421         .batch_size(batch_size)
5422         .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5423     }
5424   }
5425 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,batch_gt_32)5426   TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, batch_gt_32) {
5427     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
5428       VAddCMicrokernelTester()
5429         .batch_size(batch_size)
5430         .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5431     }
5432   }
5433 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,inplace)5434   TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, inplace) {
5435     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5436       VAddCMicrokernelTester()
5437         .batch_size(batch_size)
5438         .inplace(true)
5439         .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5440     }
5441   }
5442 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,a_zero_point)5443   TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, a_zero_point) {
5444     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5445       for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5446         VAddCMicrokernelTester()
5447           .batch_size(batch_size)
5448           .a_zero_point(a_zero_point)
5449           .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5450       }
5451     }
5452   }
5453 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,b_zero_point)5454   TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, b_zero_point) {
5455     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5456       for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5457         VAddCMicrokernelTester()
5458           .batch_size(batch_size)
5459           .b_zero_point(b_zero_point)
5460           .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5461       }
5462     }
5463   }
5464 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,y_zero_point)5465   TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, y_zero_point) {
5466     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5467       for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5468         VAddCMicrokernelTester()
5469           .batch_size(batch_size)
5470           .y_zero_point(y_zero_point)
5471           .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5472       }
5473     }
5474   }
5475 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,a_scale)5476   TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, a_scale) {
5477     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5478       for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5479         VAddCMicrokernelTester()
5480           .batch_size(batch_size)
5481           .a_scale(a_scale)
5482           .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5483       }
5484     }
5485   }
5486 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,b_scale)5487   TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, b_scale) {
5488     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5489       for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5490         VAddCMicrokernelTester()
5491           .batch_size(batch_size)
5492           .b_scale(b_scale)
5493           .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5494       }
5495     }
5496   }
5497 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,y_scale)5498   TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, y_scale) {
5499     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5500       for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5501         VAddCMicrokernelTester()
5502           .batch_size(batch_size)
5503           .y_scale(y_scale)
5504           .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5505       }
5506     }
5507   }
5508 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,qmin)5509   TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, qmin) {
5510     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5511       VAddCMicrokernelTester()
5512         .batch_size(batch_size)
5513         .qmin(128)
5514         .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5515     }
5516   }
5517 
TEST(QS8_VADDC_MINMAX__WASMSIMD_X32,qmax)5518   TEST(QS8_VADDC_MINMAX__WASMSIMD_X32, qmax) {
5519     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5520       VAddCMicrokernelTester()
5521         .batch_size(batch_size)
5522         .qmax(128)
5523         .Test(xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x32, xnn_init_qs8_add_minmax_wasmsimd_params);
5524     }
5525   }
5526 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5527 
5528 
TEST(QS8_VADDC_MINMAX__SCALAR_X1,batch_eq_1)5529 TEST(QS8_VADDC_MINMAX__SCALAR_X1, batch_eq_1) {
5530   VAddCMicrokernelTester()
5531     .batch_size(1)
5532     .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
5533 }
5534 
TEST(QS8_VADDC_MINMAX__SCALAR_X1,batch_gt_1)5535 TEST(QS8_VADDC_MINMAX__SCALAR_X1, batch_gt_1) {
5536   for (size_t batch_size = 2; batch_size < 10; batch_size++) {
5537     VAddCMicrokernelTester()
5538       .batch_size(batch_size)
5539       .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
5540   }
5541 }
5542 
TEST(QS8_VADDC_MINMAX__SCALAR_X1,inplace)5543 TEST(QS8_VADDC_MINMAX__SCALAR_X1, inplace) {
5544   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
5545     VAddCMicrokernelTester()
5546       .batch_size(batch_size)
5547       .inplace(true)
5548       .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
5549   }
5550 }
5551 
TEST(QS8_VADDC_MINMAX__SCALAR_X1,a_zero_point)5552 TEST(QS8_VADDC_MINMAX__SCALAR_X1, a_zero_point) {
5553   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
5554     for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5555       VAddCMicrokernelTester()
5556         .batch_size(batch_size)
5557         .a_zero_point(a_zero_point)
5558         .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
5559     }
5560   }
5561 }
5562 
TEST(QS8_VADDC_MINMAX__SCALAR_X1,b_zero_point)5563 TEST(QS8_VADDC_MINMAX__SCALAR_X1, b_zero_point) {
5564   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
5565     for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5566       VAddCMicrokernelTester()
5567         .batch_size(batch_size)
5568         .b_zero_point(b_zero_point)
5569         .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
5570     }
5571   }
5572 }
5573 
TEST(QS8_VADDC_MINMAX__SCALAR_X1,y_zero_point)5574 TEST(QS8_VADDC_MINMAX__SCALAR_X1, y_zero_point) {
5575   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
5576     for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5577       VAddCMicrokernelTester()
5578         .batch_size(batch_size)
5579         .y_zero_point(y_zero_point)
5580         .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
5581     }
5582   }
5583 }
5584 
TEST(QS8_VADDC_MINMAX__SCALAR_X1,a_scale)5585 TEST(QS8_VADDC_MINMAX__SCALAR_X1, a_scale) {
5586   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
5587     for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5588       VAddCMicrokernelTester()
5589         .batch_size(batch_size)
5590         .a_scale(a_scale)
5591         .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
5592     }
5593   }
5594 }
5595 
TEST(QS8_VADDC_MINMAX__SCALAR_X1,b_scale)5596 TEST(QS8_VADDC_MINMAX__SCALAR_X1, b_scale) {
5597   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
5598     for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5599       VAddCMicrokernelTester()
5600         .batch_size(batch_size)
5601         .b_scale(b_scale)
5602         .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
5603     }
5604   }
5605 }
5606 
TEST(QS8_VADDC_MINMAX__SCALAR_X1,y_scale)5607 TEST(QS8_VADDC_MINMAX__SCALAR_X1, y_scale) {
5608   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
5609     for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5610       VAddCMicrokernelTester()
5611         .batch_size(batch_size)
5612         .y_scale(y_scale)
5613         .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
5614     }
5615   }
5616 }
5617 
TEST(QS8_VADDC_MINMAX__SCALAR_X1,qmin)5618 TEST(QS8_VADDC_MINMAX__SCALAR_X1, qmin) {
5619   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
5620     VAddCMicrokernelTester()
5621       .batch_size(batch_size)
5622       .qmin(128)
5623       .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
5624   }
5625 }
5626 
TEST(QS8_VADDC_MINMAX__SCALAR_X1,qmax)5627 TEST(QS8_VADDC_MINMAX__SCALAR_X1, qmax) {
5628   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
5629     VAddCMicrokernelTester()
5630       .batch_size(batch_size)
5631       .qmax(128)
5632       .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x1, xnn_init_qs8_add_minmax_scalar_params);
5633   }
5634 }
5635 
TEST(QS8_VADDC_MINMAX__SCALAR_X2,batch_eq_2)5636 TEST(QS8_VADDC_MINMAX__SCALAR_X2, batch_eq_2) {
5637   VAddCMicrokernelTester()
5638     .batch_size(2)
5639     .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5640 }
5641 
TEST(QS8_VADDC_MINMAX__SCALAR_X2,batch_div_2)5642 TEST(QS8_VADDC_MINMAX__SCALAR_X2, batch_div_2) {
5643   for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
5644     VAddCMicrokernelTester()
5645       .batch_size(batch_size)
5646       .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5647   }
5648 }
5649 
TEST(QS8_VADDC_MINMAX__SCALAR_X2,batch_lt_2)5650 TEST(QS8_VADDC_MINMAX__SCALAR_X2, batch_lt_2) {
5651   for (size_t batch_size = 1; batch_size < 2; batch_size++) {
5652     VAddCMicrokernelTester()
5653       .batch_size(batch_size)
5654       .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5655   }
5656 }
5657 
TEST(QS8_VADDC_MINMAX__SCALAR_X2,batch_gt_2)5658 TEST(QS8_VADDC_MINMAX__SCALAR_X2, batch_gt_2) {
5659   for (size_t batch_size = 3; batch_size < 4; batch_size++) {
5660     VAddCMicrokernelTester()
5661       .batch_size(batch_size)
5662       .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5663   }
5664 }
5665 
TEST(QS8_VADDC_MINMAX__SCALAR_X2,inplace)5666 TEST(QS8_VADDC_MINMAX__SCALAR_X2, inplace) {
5667   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
5668     VAddCMicrokernelTester()
5669       .batch_size(batch_size)
5670       .inplace(true)
5671       .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5672   }
5673 }
5674 
TEST(QS8_VADDC_MINMAX__SCALAR_X2,a_zero_point)5675 TEST(QS8_VADDC_MINMAX__SCALAR_X2, a_zero_point) {
5676   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
5677     for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5678       VAddCMicrokernelTester()
5679         .batch_size(batch_size)
5680         .a_zero_point(a_zero_point)
5681         .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5682     }
5683   }
5684 }
5685 
TEST(QS8_VADDC_MINMAX__SCALAR_X2,b_zero_point)5686 TEST(QS8_VADDC_MINMAX__SCALAR_X2, b_zero_point) {
5687   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
5688     for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5689       VAddCMicrokernelTester()
5690         .batch_size(batch_size)
5691         .b_zero_point(b_zero_point)
5692         .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5693     }
5694   }
5695 }
5696 
TEST(QS8_VADDC_MINMAX__SCALAR_X2,y_zero_point)5697 TEST(QS8_VADDC_MINMAX__SCALAR_X2, y_zero_point) {
5698   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
5699     for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5700       VAddCMicrokernelTester()
5701         .batch_size(batch_size)
5702         .y_zero_point(y_zero_point)
5703         .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5704     }
5705   }
5706 }
5707 
TEST(QS8_VADDC_MINMAX__SCALAR_X2,a_scale)5708 TEST(QS8_VADDC_MINMAX__SCALAR_X2, a_scale) {
5709   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
5710     for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5711       VAddCMicrokernelTester()
5712         .batch_size(batch_size)
5713         .a_scale(a_scale)
5714         .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5715     }
5716   }
5717 }
5718 
TEST(QS8_VADDC_MINMAX__SCALAR_X2,b_scale)5719 TEST(QS8_VADDC_MINMAX__SCALAR_X2, b_scale) {
5720   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
5721     for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5722       VAddCMicrokernelTester()
5723         .batch_size(batch_size)
5724         .b_scale(b_scale)
5725         .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5726     }
5727   }
5728 }
5729 
TEST(QS8_VADDC_MINMAX__SCALAR_X2,y_scale)5730 TEST(QS8_VADDC_MINMAX__SCALAR_X2, y_scale) {
5731   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
5732     for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5733       VAddCMicrokernelTester()
5734         .batch_size(batch_size)
5735         .y_scale(y_scale)
5736         .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5737     }
5738   }
5739 }
5740 
TEST(QS8_VADDC_MINMAX__SCALAR_X2,qmin)5741 TEST(QS8_VADDC_MINMAX__SCALAR_X2, qmin) {
5742   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
5743     VAddCMicrokernelTester()
5744       .batch_size(batch_size)
5745       .qmin(128)
5746       .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5747   }
5748 }
5749 
TEST(QS8_VADDC_MINMAX__SCALAR_X2,qmax)5750 TEST(QS8_VADDC_MINMAX__SCALAR_X2, qmax) {
5751   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
5752     VAddCMicrokernelTester()
5753       .batch_size(batch_size)
5754       .qmax(128)
5755       .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x2, xnn_init_qs8_add_minmax_scalar_params);
5756   }
5757 }
5758 
TEST(QS8_VADDC_MINMAX__SCALAR_X4,batch_eq_4)5759 TEST(QS8_VADDC_MINMAX__SCALAR_X4, batch_eq_4) {
5760   VAddCMicrokernelTester()
5761     .batch_size(4)
5762     .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5763 }
5764 
TEST(QS8_VADDC_MINMAX__SCALAR_X4,batch_div_4)5765 TEST(QS8_VADDC_MINMAX__SCALAR_X4, batch_div_4) {
5766   for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
5767     VAddCMicrokernelTester()
5768       .batch_size(batch_size)
5769       .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5770   }
5771 }
5772 
TEST(QS8_VADDC_MINMAX__SCALAR_X4,batch_lt_4)5773 TEST(QS8_VADDC_MINMAX__SCALAR_X4, batch_lt_4) {
5774   for (size_t batch_size = 1; batch_size < 4; batch_size++) {
5775     VAddCMicrokernelTester()
5776       .batch_size(batch_size)
5777       .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5778   }
5779 }
5780 
TEST(QS8_VADDC_MINMAX__SCALAR_X4,batch_gt_4)5781 TEST(QS8_VADDC_MINMAX__SCALAR_X4, batch_gt_4) {
5782   for (size_t batch_size = 5; batch_size < 8; batch_size++) {
5783     VAddCMicrokernelTester()
5784       .batch_size(batch_size)
5785       .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5786   }
5787 }
5788 
TEST(QS8_VADDC_MINMAX__SCALAR_X4,inplace)5789 TEST(QS8_VADDC_MINMAX__SCALAR_X4, inplace) {
5790   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
5791     VAddCMicrokernelTester()
5792       .batch_size(batch_size)
5793       .inplace(true)
5794       .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5795   }
5796 }
5797 
TEST(QS8_VADDC_MINMAX__SCALAR_X4,a_zero_point)5798 TEST(QS8_VADDC_MINMAX__SCALAR_X4, a_zero_point) {
5799   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
5800     for (int32_t a_zero_point = -128; a_zero_point <= 127; a_zero_point += 51) {
5801       VAddCMicrokernelTester()
5802         .batch_size(batch_size)
5803         .a_zero_point(a_zero_point)
5804         .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5805     }
5806   }
5807 }
5808 
TEST(QS8_VADDC_MINMAX__SCALAR_X4,b_zero_point)5809 TEST(QS8_VADDC_MINMAX__SCALAR_X4, b_zero_point) {
5810   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
5811     for (int32_t b_zero_point = -128; b_zero_point <= 127; b_zero_point += 51) {
5812       VAddCMicrokernelTester()
5813         .batch_size(batch_size)
5814         .b_zero_point(b_zero_point)
5815         .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5816     }
5817   }
5818 }
5819 
TEST(QS8_VADDC_MINMAX__SCALAR_X4,y_zero_point)5820 TEST(QS8_VADDC_MINMAX__SCALAR_X4, y_zero_point) {
5821   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
5822     for (int32_t y_zero_point = -128; y_zero_point <= 127; y_zero_point += 51) {
5823       VAddCMicrokernelTester()
5824         .batch_size(batch_size)
5825         .y_zero_point(y_zero_point)
5826         .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5827     }
5828   }
5829 }
5830 
TEST(QS8_VADDC_MINMAX__SCALAR_X4,a_scale)5831 TEST(QS8_VADDC_MINMAX__SCALAR_X4, a_scale) {
5832   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
5833     for (float a_scale = 0.1f; a_scale <= 10.0f; a_scale *= 3.14f) {
5834       VAddCMicrokernelTester()
5835         .batch_size(batch_size)
5836         .a_scale(a_scale)
5837         .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5838     }
5839   }
5840 }
5841 
TEST(QS8_VADDC_MINMAX__SCALAR_X4,b_scale)5842 TEST(QS8_VADDC_MINMAX__SCALAR_X4, b_scale) {
5843   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
5844     for (float b_scale = 0.1f; b_scale <= 10.0f; b_scale *= 3.14f) {
5845       VAddCMicrokernelTester()
5846         .batch_size(batch_size)
5847         .b_scale(b_scale)
5848         .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5849     }
5850   }
5851 }
5852 
TEST(QS8_VADDC_MINMAX__SCALAR_X4,y_scale)5853 TEST(QS8_VADDC_MINMAX__SCALAR_X4, y_scale) {
5854   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
5855     for (float y_scale = 0.1f; y_scale <= 10.0f; y_scale *= 3.14f) {
5856       VAddCMicrokernelTester()
5857         .batch_size(batch_size)
5858         .y_scale(y_scale)
5859         .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5860     }
5861   }
5862 }
5863 
TEST(QS8_VADDC_MINMAX__SCALAR_X4,qmin)5864 TEST(QS8_VADDC_MINMAX__SCALAR_X4, qmin) {
5865   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
5866     VAddCMicrokernelTester()
5867       .batch_size(batch_size)
5868       .qmin(128)
5869       .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5870   }
5871 }
5872 
TEST(QS8_VADDC_MINMAX__SCALAR_X4,qmax)5873 TEST(QS8_VADDC_MINMAX__SCALAR_X4, qmax) {
5874   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
5875     VAddCMicrokernelTester()
5876       .batch_size(batch_size)
5877       .qmax(128)
5878       .Test(xnn_qs8_vaddc_minmax_ukernel__scalar_x4, xnn_init_qs8_add_minmax_scalar_params);
5879   }
5880 }