1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/qu8-dwconv-minmax-fp32.yaml
11 // Generator: tools/generate-dwconv-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/common.h>
17 #include <xnnpack/isa-checks.h>
18
19 #include <xnnpack/dwconv.h>
20 #include "dwconv-microkernel-tester.h"
21
22
23 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_eq_8)24 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_eq_8) {
25 TEST_REQUIRES_ARM_NEON;
26 DWConvMicrokernelTester()
27 .cr(8)
28 .kr(9)
29 .channels(8)
30 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
31 }
32
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_div_8)33 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_div_8) {
34 TEST_REQUIRES_ARM_NEON;
35 for (uint32_t channels = 16; channels < 128; channels += 24) {
36 DWConvMicrokernelTester()
37 .cr(8)
38 .kr(9)
39 .channels(channels)
40 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
41 }
42 }
43
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_div_8_with_qmin)44 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_div_8_with_qmin) {
45 TEST_REQUIRES_ARM_NEON;
46 for (uint32_t channels = 16; channels < 128; channels += 24) {
47 DWConvMicrokernelTester()
48 .cr(8)
49 .kr(9)
50 .channels(channels)
51 .qmin(128)
52 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
53 }
54 }
55
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_div_8_with_qmax)56 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_div_8_with_qmax) {
57 TEST_REQUIRES_ARM_NEON;
58 for (uint32_t channels = 16; channels < 128; channels += 24) {
59 DWConvMicrokernelTester()
60 .cr(8)
61 .kr(9)
62 .channels(channels)
63 .qmax(128)
64 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
65 }
66 }
67
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_lt_8)68 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_lt_8) {
69 TEST_REQUIRES_ARM_NEON;
70 for (uint32_t channels = 1; channels < 8; channels++) {
71 DWConvMicrokernelTester()
72 .cr(8)
73 .kr(9)
74 .channels(channels)
75 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
76 }
77 }
78
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_gt_8)79 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_gt_8) {
80 TEST_REQUIRES_ARM_NEON;
81 for (uint32_t channels = 9; channels < 16; channels++) {
82 DWConvMicrokernelTester()
83 .cr(8)
84 .kr(9)
85 .channels(channels)
86 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
87 }
88 }
89
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_gt_8_with_qmin)90 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_gt_8_with_qmin) {
91 TEST_REQUIRES_ARM_NEON;
92 for (uint32_t channels = 9; channels < 16; channels++) {
93 DWConvMicrokernelTester()
94 .cr(8)
95 .kr(9)
96 .channels(channels)
97 .qmin(128)
98 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
99 }
100 }
101
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,c_gt_8_with_qmax)102 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, c_gt_8_with_qmax) {
103 TEST_REQUIRES_ARM_NEON;
104 for (uint32_t channels = 9; channels < 16; channels++) {
105 DWConvMicrokernelTester()
106 .cr(8)
107 .kr(9)
108 .channels(channels)
109 .qmax(128)
110 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
111 }
112 }
113
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,multipixel)114 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel) {
115 TEST_REQUIRES_ARM_NEON;
116 for (size_t channels = 1; channels <= 40; channels += 7) {
117 DWConvMicrokernelTester()
118 .cr(8)
119 .kr(9)
120 .channels(channels)
121 .width(3)
122 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
123 }
124 }
125
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,multipixel_with_step)126 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_step) {
127 TEST_REQUIRES_ARM_NEON;
128 for (size_t channels = 1; channels <= 40; channels += 7) {
129 for (size_t step = 2; step <= 9; step++) {
130 DWConvMicrokernelTester()
131 .cr(8)
132 .kr(9)
133 .channels(channels)
134 .width(3)
135 .step(step)
136 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
137 }
138 }
139 }
140
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,multipixel_with_output_stride)141 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_output_stride) {
142 TEST_REQUIRES_ARM_NEON;
143 for (size_t channels = 1; channels <= 40; channels += 7) {
144 DWConvMicrokernelTester()
145 .cr(8)
146 .kr(9)
147 .channels(8)
148 .width(5)
149 .output_stride(43)
150 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
151 }
152 }
153
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,multipixel_with_qmin)154 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_qmin) {
155 TEST_REQUIRES_ARM_NEON;
156 for (size_t channels = 1; channels <= 40; channels += 7) {
157 DWConvMicrokernelTester()
158 .cr(8)
159 .kr(9)
160 .channels(channels)
161 .width(3)
162 .qmin(128)
163 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
164 }
165 }
166
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,multipixel_with_qmax)167 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, multipixel_with_qmax) {
168 TEST_REQUIRES_ARM_NEON;
169 for (size_t channels = 1; channels <= 40; channels += 7) {
170 DWConvMicrokernelTester()
171 .cr(8)
172 .kr(9)
173 .channels(channels)
174 .width(3)
175 .qmax(128)
176 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
177 }
178 }
179
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,input_zero_point_only)180 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, input_zero_point_only) {
181 TEST_REQUIRES_ARM_NEON;
182 for (size_t channels = 1; channels <= 40; channels += 7) {
183 DWConvMicrokernelTester()
184 .cr(8)
185 .kr(9)
186 .channels(channels)
187 .width(3)
188 .input_zero_point(255)
189 .kernel_zero_point(0)
190 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
191 }
192 }
193
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,kernel_zero_point_only)194 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, kernel_zero_point_only) {
195 TEST_REQUIRES_ARM_NEON;
196 for (size_t channels = 1; channels <= 40; channels += 7) {
197 DWConvMicrokernelTester()
198 .cr(8)
199 .kr(9)
200 .channels(channels)
201 .width(3)
202 .input_zero_point(0)
203 .kernel_zero_point(255)
204 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
205 }
206 }
207
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,input_offset)208 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, input_offset) {
209 TEST_REQUIRES_ARM_NEON;
210 for (uint32_t channels = 16; channels < 128; channels += 24) {
211 DWConvMicrokernelTester()
212 .cr(8)
213 .kr(9)
214 .channels(channels)
215 .input_offset(176)
216 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
217 }
218 }
219
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16,zero)220 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEON_MUL16, zero) {
221 TEST_REQUIRES_ARM_NEON;
222 for (uint32_t mz = 0; mz < 9; mz++) {
223 for (uint32_t channels = 16; channels < 128; channels += 24) {
224 DWConvMicrokernelTester()
225 .cr(8)
226 .kr(9)
227 .channels(channels)
228 .input_offset(176)
229 .zero_index(mz)
230 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
231 }
232 }
233 }
234 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
235
236
237 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_eq_8)238 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_eq_8) {
239 TEST_REQUIRES_ARM_NEON_V8;
240 DWConvMicrokernelTester()
241 .cr(8)
242 .kr(9)
243 .channels(8)
244 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
245 }
246
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_div_8)247 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_div_8) {
248 TEST_REQUIRES_ARM_NEON_V8;
249 for (uint32_t channels = 16; channels < 128; channels += 24) {
250 DWConvMicrokernelTester()
251 .cr(8)
252 .kr(9)
253 .channels(channels)
254 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
255 }
256 }
257
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_div_8_with_qmin)258 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_div_8_with_qmin) {
259 TEST_REQUIRES_ARM_NEON_V8;
260 for (uint32_t channels = 16; channels < 128; channels += 24) {
261 DWConvMicrokernelTester()
262 .cr(8)
263 .kr(9)
264 .channels(channels)
265 .qmin(128)
266 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
267 }
268 }
269
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_div_8_with_qmax)270 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_div_8_with_qmax) {
271 TEST_REQUIRES_ARM_NEON_V8;
272 for (uint32_t channels = 16; channels < 128; channels += 24) {
273 DWConvMicrokernelTester()
274 .cr(8)
275 .kr(9)
276 .channels(channels)
277 .qmax(128)
278 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
279 }
280 }
281
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_lt_8)282 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_lt_8) {
283 TEST_REQUIRES_ARM_NEON_V8;
284 for (uint32_t channels = 1; channels < 8; channels++) {
285 DWConvMicrokernelTester()
286 .cr(8)
287 .kr(9)
288 .channels(channels)
289 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
290 }
291 }
292
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_gt_8)293 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_gt_8) {
294 TEST_REQUIRES_ARM_NEON_V8;
295 for (uint32_t channels = 9; channels < 16; channels++) {
296 DWConvMicrokernelTester()
297 .cr(8)
298 .kr(9)
299 .channels(channels)
300 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
301 }
302 }
303
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_gt_8_with_qmin)304 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_gt_8_with_qmin) {
305 TEST_REQUIRES_ARM_NEON_V8;
306 for (uint32_t channels = 9; channels < 16; channels++) {
307 DWConvMicrokernelTester()
308 .cr(8)
309 .kr(9)
310 .channels(channels)
311 .qmin(128)
312 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
313 }
314 }
315
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,c_gt_8_with_qmax)316 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, c_gt_8_with_qmax) {
317 TEST_REQUIRES_ARM_NEON_V8;
318 for (uint32_t channels = 9; channels < 16; channels++) {
319 DWConvMicrokernelTester()
320 .cr(8)
321 .kr(9)
322 .channels(channels)
323 .qmax(128)
324 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
325 }
326 }
327
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,multipixel)328 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel) {
329 TEST_REQUIRES_ARM_NEON_V8;
330 for (size_t channels = 1; channels <= 40; channels += 7) {
331 DWConvMicrokernelTester()
332 .cr(8)
333 .kr(9)
334 .channels(channels)
335 .width(3)
336 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
337 }
338 }
339
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,multipixel_with_step)340 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_step) {
341 TEST_REQUIRES_ARM_NEON_V8;
342 for (size_t channels = 1; channels <= 40; channels += 7) {
343 for (size_t step = 2; step <= 9; step++) {
344 DWConvMicrokernelTester()
345 .cr(8)
346 .kr(9)
347 .channels(channels)
348 .width(3)
349 .step(step)
350 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
351 }
352 }
353 }
354
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,multipixel_with_output_stride)355 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_output_stride) {
356 TEST_REQUIRES_ARM_NEON_V8;
357 for (size_t channels = 1; channels <= 40; channels += 7) {
358 DWConvMicrokernelTester()
359 .cr(8)
360 .kr(9)
361 .channels(8)
362 .width(5)
363 .output_stride(43)
364 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
365 }
366 }
367
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,multipixel_with_qmin)368 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_qmin) {
369 TEST_REQUIRES_ARM_NEON_V8;
370 for (size_t channels = 1; channels <= 40; channels += 7) {
371 DWConvMicrokernelTester()
372 .cr(8)
373 .kr(9)
374 .channels(channels)
375 .width(3)
376 .qmin(128)
377 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
378 }
379 }
380
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,multipixel_with_qmax)381 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, multipixel_with_qmax) {
382 TEST_REQUIRES_ARM_NEON_V8;
383 for (size_t channels = 1; channels <= 40; channels += 7) {
384 DWConvMicrokernelTester()
385 .cr(8)
386 .kr(9)
387 .channels(channels)
388 .width(3)
389 .qmax(128)
390 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
391 }
392 }
393
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,input_zero_point_only)394 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, input_zero_point_only) {
395 TEST_REQUIRES_ARM_NEON_V8;
396 for (size_t channels = 1; channels <= 40; channels += 7) {
397 DWConvMicrokernelTester()
398 .cr(8)
399 .kr(9)
400 .channels(channels)
401 .width(3)
402 .input_zero_point(255)
403 .kernel_zero_point(0)
404 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
405 }
406 }
407
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,kernel_zero_point_only)408 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, kernel_zero_point_only) {
409 TEST_REQUIRES_ARM_NEON_V8;
410 for (size_t channels = 1; channels <= 40; channels += 7) {
411 DWConvMicrokernelTester()
412 .cr(8)
413 .kr(9)
414 .channels(channels)
415 .width(3)
416 .input_zero_point(0)
417 .kernel_zero_point(255)
418 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
419 }
420 }
421
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,input_offset)422 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, input_offset) {
423 TEST_REQUIRES_ARM_NEON_V8;
424 for (uint32_t channels = 16; channels < 128; channels += 24) {
425 DWConvMicrokernelTester()
426 .cr(8)
427 .kr(9)
428 .channels(channels)
429 .input_offset(176)
430 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
431 }
432 }
433
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16,zero)434 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__NEONV8_MUL16, zero) {
435 TEST_REQUIRES_ARM_NEON_V8;
436 for (uint32_t mz = 0; mz < 9; mz++) {
437 for (uint32_t channels = 16; channels < 128; channels += 24) {
438 DWConvMicrokernelTester()
439 .cr(8)
440 .kr(9)
441 .channels(channels)
442 .input_offset(176)
443 .zero_index(mz)
444 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
445 }
446 }
447 }
448 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
449
450
451 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_eq_8)452 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_eq_8) {
453 TEST_REQUIRES_ARM_NEON;
454 DWConvMicrokernelTester()
455 .cr(8)
456 .kr(25)
457 .channels(8)
458 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
459 }
460
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_div_8)461 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_div_8) {
462 TEST_REQUIRES_ARM_NEON;
463 for (uint32_t channels = 16; channels < 128; channels += 24) {
464 DWConvMicrokernelTester()
465 .cr(8)
466 .kr(25)
467 .channels(channels)
468 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
469 }
470 }
471
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_div_8_with_qmin)472 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_div_8_with_qmin) {
473 TEST_REQUIRES_ARM_NEON;
474 for (uint32_t channels = 16; channels < 128; channels += 24) {
475 DWConvMicrokernelTester()
476 .cr(8)
477 .kr(25)
478 .channels(channels)
479 .qmin(128)
480 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
481 }
482 }
483
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_div_8_with_qmax)484 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_div_8_with_qmax) {
485 TEST_REQUIRES_ARM_NEON;
486 for (uint32_t channels = 16; channels < 128; channels += 24) {
487 DWConvMicrokernelTester()
488 .cr(8)
489 .kr(25)
490 .channels(channels)
491 .qmax(128)
492 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
493 }
494 }
495
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_lt_8)496 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_lt_8) {
497 TEST_REQUIRES_ARM_NEON;
498 for (uint32_t channels = 1; channels < 8; channels++) {
499 DWConvMicrokernelTester()
500 .cr(8)
501 .kr(25)
502 .channels(channels)
503 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
504 }
505 }
506
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_gt_8)507 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_gt_8) {
508 TEST_REQUIRES_ARM_NEON;
509 for (uint32_t channels = 9; channels < 16; channels++) {
510 DWConvMicrokernelTester()
511 .cr(8)
512 .kr(25)
513 .channels(channels)
514 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
515 }
516 }
517
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_gt_8_with_qmin)518 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_gt_8_with_qmin) {
519 TEST_REQUIRES_ARM_NEON;
520 for (uint32_t channels = 9; channels < 16; channels++) {
521 DWConvMicrokernelTester()
522 .cr(8)
523 .kr(25)
524 .channels(channels)
525 .qmin(128)
526 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
527 }
528 }
529
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,c_gt_8_with_qmax)530 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, c_gt_8_with_qmax) {
531 TEST_REQUIRES_ARM_NEON;
532 for (uint32_t channels = 9; channels < 16; channels++) {
533 DWConvMicrokernelTester()
534 .cr(8)
535 .kr(25)
536 .channels(channels)
537 .qmax(128)
538 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
539 }
540 }
541
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,multipixel)542 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel) {
543 TEST_REQUIRES_ARM_NEON;
544 for (size_t channels = 1; channels <= 40; channels += 7) {
545 DWConvMicrokernelTester()
546 .cr(8)
547 .kr(25)
548 .channels(channels)
549 .width(3)
550 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
551 }
552 }
553
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,multipixel_with_step)554 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_step) {
555 TEST_REQUIRES_ARM_NEON;
556 for (size_t channels = 1; channels <= 40; channels += 7) {
557 for (size_t step = 2; step <= 25; step++) {
558 DWConvMicrokernelTester()
559 .cr(8)
560 .kr(25)
561 .channels(channels)
562 .width(3)
563 .step(step)
564 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
565 }
566 }
567 }
568
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,multipixel_with_output_stride)569 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_output_stride) {
570 TEST_REQUIRES_ARM_NEON;
571 for (size_t channels = 1; channels <= 40; channels += 7) {
572 DWConvMicrokernelTester()
573 .cr(8)
574 .kr(25)
575 .channels(8)
576 .width(5)
577 .output_stride(43)
578 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
579 }
580 }
581
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,multipixel_with_qmin)582 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_qmin) {
583 TEST_REQUIRES_ARM_NEON;
584 for (size_t channels = 1; channels <= 40; channels += 7) {
585 DWConvMicrokernelTester()
586 .cr(8)
587 .kr(25)
588 .channels(channels)
589 .width(3)
590 .qmin(128)
591 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
592 }
593 }
594
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,multipixel_with_qmax)595 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, multipixel_with_qmax) {
596 TEST_REQUIRES_ARM_NEON;
597 for (size_t channels = 1; channels <= 40; channels += 7) {
598 DWConvMicrokernelTester()
599 .cr(8)
600 .kr(25)
601 .channels(channels)
602 .width(3)
603 .qmax(128)
604 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
605 }
606 }
607
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,input_zero_point_only)608 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, input_zero_point_only) {
609 TEST_REQUIRES_ARM_NEON;
610 for (size_t channels = 1; channels <= 40; channels += 7) {
611 DWConvMicrokernelTester()
612 .cr(8)
613 .kr(25)
614 .channels(channels)
615 .width(3)
616 .input_zero_point(255)
617 .kernel_zero_point(0)
618 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
619 }
620 }
621
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,kernel_zero_point_only)622 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, kernel_zero_point_only) {
623 TEST_REQUIRES_ARM_NEON;
624 for (size_t channels = 1; channels <= 40; channels += 7) {
625 DWConvMicrokernelTester()
626 .cr(8)
627 .kr(25)
628 .channels(channels)
629 .width(3)
630 .input_zero_point(0)
631 .kernel_zero_point(255)
632 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
633 }
634 }
635
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,input_offset)636 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, input_offset) {
637 TEST_REQUIRES_ARM_NEON;
638 for (uint32_t channels = 16; channels < 128; channels += 24) {
639 DWConvMicrokernelTester()
640 .cr(8)
641 .kr(25)
642 .channels(channels)
643 .input_offset(176)
644 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
645 }
646 }
647
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16,zero)648 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEON_MUL16, zero) {
649 TEST_REQUIRES_ARM_NEON;
650 for (uint32_t mz = 0; mz < 25; mz++) {
651 for (uint32_t channels = 16; channels < 128; channels += 24) {
652 DWConvMicrokernelTester()
653 .cr(8)
654 .kr(25)
655 .channels(channels)
656 .input_offset(176)
657 .zero_index(mz)
658 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
659 }
660 }
661 }
662 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
663
664
665 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_eq_8)666 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_eq_8) {
667 TEST_REQUIRES_ARM_NEON_V8;
668 DWConvMicrokernelTester()
669 .cr(8)
670 .kr(25)
671 .channels(8)
672 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
673 }
674
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_div_8)675 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_div_8) {
676 TEST_REQUIRES_ARM_NEON_V8;
677 for (uint32_t channels = 16; channels < 128; channels += 24) {
678 DWConvMicrokernelTester()
679 .cr(8)
680 .kr(25)
681 .channels(channels)
682 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
683 }
684 }
685
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_div_8_with_qmin)686 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_div_8_with_qmin) {
687 TEST_REQUIRES_ARM_NEON_V8;
688 for (uint32_t channels = 16; channels < 128; channels += 24) {
689 DWConvMicrokernelTester()
690 .cr(8)
691 .kr(25)
692 .channels(channels)
693 .qmin(128)
694 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
695 }
696 }
697
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_div_8_with_qmax)698 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_div_8_with_qmax) {
699 TEST_REQUIRES_ARM_NEON_V8;
700 for (uint32_t channels = 16; channels < 128; channels += 24) {
701 DWConvMicrokernelTester()
702 .cr(8)
703 .kr(25)
704 .channels(channels)
705 .qmax(128)
706 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
707 }
708 }
709
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_lt_8)710 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_lt_8) {
711 TEST_REQUIRES_ARM_NEON_V8;
712 for (uint32_t channels = 1; channels < 8; channels++) {
713 DWConvMicrokernelTester()
714 .cr(8)
715 .kr(25)
716 .channels(channels)
717 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
718 }
719 }
720
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_gt_8)721 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_gt_8) {
722 TEST_REQUIRES_ARM_NEON_V8;
723 for (uint32_t channels = 9; channels < 16; channels++) {
724 DWConvMicrokernelTester()
725 .cr(8)
726 .kr(25)
727 .channels(channels)
728 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
729 }
730 }
731
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_gt_8_with_qmin)732 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_gt_8_with_qmin) {
733 TEST_REQUIRES_ARM_NEON_V8;
734 for (uint32_t channels = 9; channels < 16; channels++) {
735 DWConvMicrokernelTester()
736 .cr(8)
737 .kr(25)
738 .channels(channels)
739 .qmin(128)
740 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
741 }
742 }
743
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,c_gt_8_with_qmax)744 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, c_gt_8_with_qmax) {
745 TEST_REQUIRES_ARM_NEON_V8;
746 for (uint32_t channels = 9; channels < 16; channels++) {
747 DWConvMicrokernelTester()
748 .cr(8)
749 .kr(25)
750 .channels(channels)
751 .qmax(128)
752 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
753 }
754 }
755
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,multipixel)756 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel) {
757 TEST_REQUIRES_ARM_NEON_V8;
758 for (size_t channels = 1; channels <= 40; channels += 7) {
759 DWConvMicrokernelTester()
760 .cr(8)
761 .kr(25)
762 .channels(channels)
763 .width(3)
764 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
765 }
766 }
767
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,multipixel_with_step)768 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_step) {
769 TEST_REQUIRES_ARM_NEON_V8;
770 for (size_t channels = 1; channels <= 40; channels += 7) {
771 for (size_t step = 2; step <= 25; step++) {
772 DWConvMicrokernelTester()
773 .cr(8)
774 .kr(25)
775 .channels(channels)
776 .width(3)
777 .step(step)
778 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
779 }
780 }
781 }
782
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,multipixel_with_output_stride)783 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_output_stride) {
784 TEST_REQUIRES_ARM_NEON_V8;
785 for (size_t channels = 1; channels <= 40; channels += 7) {
786 DWConvMicrokernelTester()
787 .cr(8)
788 .kr(25)
789 .channels(8)
790 .width(5)
791 .output_stride(43)
792 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
793 }
794 }
795
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,multipixel_with_qmin)796 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_qmin) {
797 TEST_REQUIRES_ARM_NEON_V8;
798 for (size_t channels = 1; channels <= 40; channels += 7) {
799 DWConvMicrokernelTester()
800 .cr(8)
801 .kr(25)
802 .channels(channels)
803 .width(3)
804 .qmin(128)
805 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
806 }
807 }
808
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,multipixel_with_qmax)809 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, multipixel_with_qmax) {
810 TEST_REQUIRES_ARM_NEON_V8;
811 for (size_t channels = 1; channels <= 40; channels += 7) {
812 DWConvMicrokernelTester()
813 .cr(8)
814 .kr(25)
815 .channels(channels)
816 .width(3)
817 .qmax(128)
818 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
819 }
820 }
821
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,input_zero_point_only)822 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, input_zero_point_only) {
823 TEST_REQUIRES_ARM_NEON_V8;
824 for (size_t channels = 1; channels <= 40; channels += 7) {
825 DWConvMicrokernelTester()
826 .cr(8)
827 .kr(25)
828 .channels(channels)
829 .width(3)
830 .input_zero_point(255)
831 .kernel_zero_point(0)
832 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
833 }
834 }
835
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,kernel_zero_point_only)836 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, kernel_zero_point_only) {
837 TEST_REQUIRES_ARM_NEON_V8;
838 for (size_t channels = 1; channels <= 40; channels += 7) {
839 DWConvMicrokernelTester()
840 .cr(8)
841 .kr(25)
842 .channels(channels)
843 .width(3)
844 .input_zero_point(0)
845 .kernel_zero_point(255)
846 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
847 }
848 }
849
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,input_offset)850 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, input_offset) {
851 TEST_REQUIRES_ARM_NEON_V8;
852 for (uint32_t channels = 16; channels < 128; channels += 24) {
853 DWConvMicrokernelTester()
854 .cr(8)
855 .kr(25)
856 .channels(channels)
857 .input_offset(176)
858 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
859 }
860 }
861
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16,zero)862 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__NEONV8_MUL16, zero) {
863 TEST_REQUIRES_ARM_NEON_V8;
864 for (uint32_t mz = 0; mz < 25; mz++) {
865 for (uint32_t channels = 16; channels < 128; channels += 24) {
866 DWConvMicrokernelTester()
867 .cr(8)
868 .kr(25)
869 .channels(channels)
870 .input_offset(176)
871 .zero_index(mz)
872 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
873 }
874 }
875 }
876 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
877
878
879 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_eq_16)880 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_eq_16) {
881 TEST_REQUIRES_ARM_NEON;
882 DWConvMicrokernelTester()
883 .cr(16)
884 .kr(9)
885 .channels(16)
886 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
887 }
888
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_div_16)889 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_div_16) {
890 TEST_REQUIRES_ARM_NEON;
891 for (uint32_t channels = 32; channels < 256; channels += 48) {
892 DWConvMicrokernelTester()
893 .cr(16)
894 .kr(9)
895 .channels(channels)
896 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
897 }
898 }
899
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_div_16_with_qmin)900 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_div_16_with_qmin) {
901 TEST_REQUIRES_ARM_NEON;
902 for (uint32_t channels = 32; channels < 256; channels += 48) {
903 DWConvMicrokernelTester()
904 .cr(16)
905 .kr(9)
906 .channels(channels)
907 .qmin(128)
908 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
909 }
910 }
911
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_div_16_with_qmax)912 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_div_16_with_qmax) {
913 TEST_REQUIRES_ARM_NEON;
914 for (uint32_t channels = 32; channels < 256; channels += 48) {
915 DWConvMicrokernelTester()
916 .cr(16)
917 .kr(9)
918 .channels(channels)
919 .qmax(128)
920 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
921 }
922 }
923
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_lt_16)924 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_lt_16) {
925 TEST_REQUIRES_ARM_NEON;
926 for (uint32_t channels = 1; channels < 16; channels++) {
927 DWConvMicrokernelTester()
928 .cr(16)
929 .kr(9)
930 .channels(channels)
931 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
932 }
933 }
934
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_gt_16)935 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_gt_16) {
936 TEST_REQUIRES_ARM_NEON;
937 for (uint32_t channels = 17; channels < 32; channels++) {
938 DWConvMicrokernelTester()
939 .cr(16)
940 .kr(9)
941 .channels(channels)
942 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
943 }
944 }
945
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_gt_16_with_qmin)946 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_gt_16_with_qmin) {
947 TEST_REQUIRES_ARM_NEON;
948 for (uint32_t channels = 17; channels < 32; channels++) {
949 DWConvMicrokernelTester()
950 .cr(16)
951 .kr(9)
952 .channels(channels)
953 .qmin(128)
954 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
955 }
956 }
957
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,c_gt_16_with_qmax)958 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, c_gt_16_with_qmax) {
959 TEST_REQUIRES_ARM_NEON;
960 for (uint32_t channels = 17; channels < 32; channels++) {
961 DWConvMicrokernelTester()
962 .cr(16)
963 .kr(9)
964 .channels(channels)
965 .qmax(128)
966 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
967 }
968 }
969
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,multipixel)970 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel) {
971 TEST_REQUIRES_ARM_NEON;
972 for (size_t channels = 1; channels <= 80; channels += 15) {
973 DWConvMicrokernelTester()
974 .cr(16)
975 .kr(9)
976 .channels(channels)
977 .width(3)
978 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
979 }
980 }
981
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,multipixel_with_step)982 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_step) {
983 TEST_REQUIRES_ARM_NEON;
984 for (size_t channels = 1; channels <= 80; channels += 15) {
985 for (size_t step = 2; step <= 9; step++) {
986 DWConvMicrokernelTester()
987 .cr(16)
988 .kr(9)
989 .channels(channels)
990 .width(3)
991 .step(step)
992 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
993 }
994 }
995 }
996
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,multipixel_with_output_stride)997 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_output_stride) {
998 TEST_REQUIRES_ARM_NEON;
999 for (size_t channels = 1; channels <= 80; channels += 15) {
1000 DWConvMicrokernelTester()
1001 .cr(16)
1002 .kr(9)
1003 .channels(16)
1004 .width(5)
1005 .output_stride(83)
1006 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1007 }
1008 }
1009
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,multipixel_with_qmin)1010 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_qmin) {
1011 TEST_REQUIRES_ARM_NEON;
1012 for (size_t channels = 1; channels <= 80; channels += 15) {
1013 DWConvMicrokernelTester()
1014 .cr(16)
1015 .kr(9)
1016 .channels(channels)
1017 .width(3)
1018 .qmin(128)
1019 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1020 }
1021 }
1022
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,multipixel_with_qmax)1023 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, multipixel_with_qmax) {
1024 TEST_REQUIRES_ARM_NEON;
1025 for (size_t channels = 1; channels <= 80; channels += 15) {
1026 DWConvMicrokernelTester()
1027 .cr(16)
1028 .kr(9)
1029 .channels(channels)
1030 .width(3)
1031 .qmax(128)
1032 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1033 }
1034 }
1035
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,input_zero_point_only)1036 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, input_zero_point_only) {
1037 TEST_REQUIRES_ARM_NEON;
1038 for (size_t channels = 1; channels <= 80; channels += 15) {
1039 DWConvMicrokernelTester()
1040 .cr(16)
1041 .kr(9)
1042 .channels(channels)
1043 .width(3)
1044 .input_zero_point(255)
1045 .kernel_zero_point(0)
1046 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1047 }
1048 }
1049
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,kernel_zero_point_only)1050 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, kernel_zero_point_only) {
1051 TEST_REQUIRES_ARM_NEON;
1052 for (size_t channels = 1; channels <= 80; channels += 15) {
1053 DWConvMicrokernelTester()
1054 .cr(16)
1055 .kr(9)
1056 .channels(channels)
1057 .width(3)
1058 .input_zero_point(0)
1059 .kernel_zero_point(255)
1060 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1061 }
1062 }
1063
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,input_offset)1064 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, input_offset) {
1065 TEST_REQUIRES_ARM_NEON;
1066 for (uint32_t channels = 32; channels < 256; channels += 48) {
1067 DWConvMicrokernelTester()
1068 .cr(16)
1069 .kr(9)
1070 .channels(channels)
1071 .input_offset(304)
1072 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1073 }
1074 }
1075
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16,zero)1076 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEON_MUL16, zero) {
1077 TEST_REQUIRES_ARM_NEON;
1078 for (uint32_t mz = 0; mz < 9; mz++) {
1079 for (uint32_t channels = 32; channels < 256; channels += 48) {
1080 DWConvMicrokernelTester()
1081 .cr(16)
1082 .kr(9)
1083 .channels(channels)
1084 .input_offset(304)
1085 .zero_index(mz)
1086 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1087 }
1088 }
1089 }
1090 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1091
1092
1093 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_eq_16)1094 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_eq_16) {
1095 TEST_REQUIRES_ARM_NEON_V8;
1096 DWConvMicrokernelTester()
1097 .cr(16)
1098 .kr(9)
1099 .channels(16)
1100 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1101 }
1102
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_div_16)1103 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_div_16) {
1104 TEST_REQUIRES_ARM_NEON_V8;
1105 for (uint32_t channels = 32; channels < 256; channels += 48) {
1106 DWConvMicrokernelTester()
1107 .cr(16)
1108 .kr(9)
1109 .channels(channels)
1110 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1111 }
1112 }
1113
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_div_16_with_qmin)1114 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_div_16_with_qmin) {
1115 TEST_REQUIRES_ARM_NEON_V8;
1116 for (uint32_t channels = 32; channels < 256; channels += 48) {
1117 DWConvMicrokernelTester()
1118 .cr(16)
1119 .kr(9)
1120 .channels(channels)
1121 .qmin(128)
1122 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1123 }
1124 }
1125
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_div_16_with_qmax)1126 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_div_16_with_qmax) {
1127 TEST_REQUIRES_ARM_NEON_V8;
1128 for (uint32_t channels = 32; channels < 256; channels += 48) {
1129 DWConvMicrokernelTester()
1130 .cr(16)
1131 .kr(9)
1132 .channels(channels)
1133 .qmax(128)
1134 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1135 }
1136 }
1137
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_lt_16)1138 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_lt_16) {
1139 TEST_REQUIRES_ARM_NEON_V8;
1140 for (uint32_t channels = 1; channels < 16; channels++) {
1141 DWConvMicrokernelTester()
1142 .cr(16)
1143 .kr(9)
1144 .channels(channels)
1145 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1146 }
1147 }
1148
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_gt_16)1149 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_gt_16) {
1150 TEST_REQUIRES_ARM_NEON_V8;
1151 for (uint32_t channels = 17; channels < 32; channels++) {
1152 DWConvMicrokernelTester()
1153 .cr(16)
1154 .kr(9)
1155 .channels(channels)
1156 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1157 }
1158 }
1159
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_gt_16_with_qmin)1160 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_gt_16_with_qmin) {
1161 TEST_REQUIRES_ARM_NEON_V8;
1162 for (uint32_t channels = 17; channels < 32; channels++) {
1163 DWConvMicrokernelTester()
1164 .cr(16)
1165 .kr(9)
1166 .channels(channels)
1167 .qmin(128)
1168 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1169 }
1170 }
1171
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,c_gt_16_with_qmax)1172 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, c_gt_16_with_qmax) {
1173 TEST_REQUIRES_ARM_NEON_V8;
1174 for (uint32_t channels = 17; channels < 32; channels++) {
1175 DWConvMicrokernelTester()
1176 .cr(16)
1177 .kr(9)
1178 .channels(channels)
1179 .qmax(128)
1180 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1181 }
1182 }
1183
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,multipixel)1184 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel) {
1185 TEST_REQUIRES_ARM_NEON_V8;
1186 for (size_t channels = 1; channels <= 80; channels += 15) {
1187 DWConvMicrokernelTester()
1188 .cr(16)
1189 .kr(9)
1190 .channels(channels)
1191 .width(3)
1192 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1193 }
1194 }
1195
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,multipixel_with_step)1196 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_step) {
1197 TEST_REQUIRES_ARM_NEON_V8;
1198 for (size_t channels = 1; channels <= 80; channels += 15) {
1199 for (size_t step = 2; step <= 9; step++) {
1200 DWConvMicrokernelTester()
1201 .cr(16)
1202 .kr(9)
1203 .channels(channels)
1204 .width(3)
1205 .step(step)
1206 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1207 }
1208 }
1209 }
1210
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,multipixel_with_output_stride)1211 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_output_stride) {
1212 TEST_REQUIRES_ARM_NEON_V8;
1213 for (size_t channels = 1; channels <= 80; channels += 15) {
1214 DWConvMicrokernelTester()
1215 .cr(16)
1216 .kr(9)
1217 .channels(16)
1218 .width(5)
1219 .output_stride(83)
1220 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1221 }
1222 }
1223
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,multipixel_with_qmin)1224 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_qmin) {
1225 TEST_REQUIRES_ARM_NEON_V8;
1226 for (size_t channels = 1; channels <= 80; channels += 15) {
1227 DWConvMicrokernelTester()
1228 .cr(16)
1229 .kr(9)
1230 .channels(channels)
1231 .width(3)
1232 .qmin(128)
1233 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1234 }
1235 }
1236
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,multipixel_with_qmax)1237 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, multipixel_with_qmax) {
1238 TEST_REQUIRES_ARM_NEON_V8;
1239 for (size_t channels = 1; channels <= 80; channels += 15) {
1240 DWConvMicrokernelTester()
1241 .cr(16)
1242 .kr(9)
1243 .channels(channels)
1244 .width(3)
1245 .qmax(128)
1246 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1247 }
1248 }
1249
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,input_zero_point_only)1250 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, input_zero_point_only) {
1251 TEST_REQUIRES_ARM_NEON_V8;
1252 for (size_t channels = 1; channels <= 80; channels += 15) {
1253 DWConvMicrokernelTester()
1254 .cr(16)
1255 .kr(9)
1256 .channels(channels)
1257 .width(3)
1258 .input_zero_point(255)
1259 .kernel_zero_point(0)
1260 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1261 }
1262 }
1263
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,kernel_zero_point_only)1264 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, kernel_zero_point_only) {
1265 TEST_REQUIRES_ARM_NEON_V8;
1266 for (size_t channels = 1; channels <= 80; channels += 15) {
1267 DWConvMicrokernelTester()
1268 .cr(16)
1269 .kr(9)
1270 .channels(channels)
1271 .width(3)
1272 .input_zero_point(0)
1273 .kernel_zero_point(255)
1274 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1275 }
1276 }
1277
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,input_offset)1278 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, input_offset) {
1279 TEST_REQUIRES_ARM_NEON_V8;
1280 for (uint32_t channels = 32; channels < 256; channels += 48) {
1281 DWConvMicrokernelTester()
1282 .cr(16)
1283 .kr(9)
1284 .channels(channels)
1285 .input_offset(304)
1286 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1287 }
1288 }
1289
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16,zero)1290 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__NEONV8_MUL16, zero) {
1291 TEST_REQUIRES_ARM_NEON_V8;
1292 for (uint32_t mz = 0; mz < 9; mz++) {
1293 for (uint32_t channels = 32; channels < 256; channels += 48) {
1294 DWConvMicrokernelTester()
1295 .cr(16)
1296 .kr(9)
1297 .channels(channels)
1298 .input_offset(304)
1299 .zero_index(mz)
1300 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1301 }
1302 }
1303 }
1304 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1305
1306
1307 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_eq_16)1308 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_eq_16) {
1309 TEST_REQUIRES_ARM_NEON;
1310 DWConvMicrokernelTester()
1311 .cr(16)
1312 .kr(25)
1313 .channels(16)
1314 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1315 }
1316
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_div_16)1317 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_div_16) {
1318 TEST_REQUIRES_ARM_NEON;
1319 for (uint32_t channels = 32; channels < 256; channels += 48) {
1320 DWConvMicrokernelTester()
1321 .cr(16)
1322 .kr(25)
1323 .channels(channels)
1324 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1325 }
1326 }
1327
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_div_16_with_qmin)1328 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_div_16_with_qmin) {
1329 TEST_REQUIRES_ARM_NEON;
1330 for (uint32_t channels = 32; channels < 256; channels += 48) {
1331 DWConvMicrokernelTester()
1332 .cr(16)
1333 .kr(25)
1334 .channels(channels)
1335 .qmin(128)
1336 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1337 }
1338 }
1339
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_div_16_with_qmax)1340 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_div_16_with_qmax) {
1341 TEST_REQUIRES_ARM_NEON;
1342 for (uint32_t channels = 32; channels < 256; channels += 48) {
1343 DWConvMicrokernelTester()
1344 .cr(16)
1345 .kr(25)
1346 .channels(channels)
1347 .qmax(128)
1348 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1349 }
1350 }
1351
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_lt_16)1352 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_lt_16) {
1353 TEST_REQUIRES_ARM_NEON;
1354 for (uint32_t channels = 1; channels < 16; channels++) {
1355 DWConvMicrokernelTester()
1356 .cr(16)
1357 .kr(25)
1358 .channels(channels)
1359 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1360 }
1361 }
1362
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_gt_16)1363 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_gt_16) {
1364 TEST_REQUIRES_ARM_NEON;
1365 for (uint32_t channels = 17; channels < 32; channels++) {
1366 DWConvMicrokernelTester()
1367 .cr(16)
1368 .kr(25)
1369 .channels(channels)
1370 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1371 }
1372 }
1373
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_gt_16_with_qmin)1374 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_gt_16_with_qmin) {
1375 TEST_REQUIRES_ARM_NEON;
1376 for (uint32_t channels = 17; channels < 32; channels++) {
1377 DWConvMicrokernelTester()
1378 .cr(16)
1379 .kr(25)
1380 .channels(channels)
1381 .qmin(128)
1382 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1383 }
1384 }
1385
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,c_gt_16_with_qmax)1386 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, c_gt_16_with_qmax) {
1387 TEST_REQUIRES_ARM_NEON;
1388 for (uint32_t channels = 17; channels < 32; channels++) {
1389 DWConvMicrokernelTester()
1390 .cr(16)
1391 .kr(25)
1392 .channels(channels)
1393 .qmax(128)
1394 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1395 }
1396 }
1397
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,multipixel)1398 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel) {
1399 TEST_REQUIRES_ARM_NEON;
1400 for (size_t channels = 1; channels <= 80; channels += 15) {
1401 DWConvMicrokernelTester()
1402 .cr(16)
1403 .kr(25)
1404 .channels(channels)
1405 .width(3)
1406 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1407 }
1408 }
1409
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,multipixel_with_step)1410 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_step) {
1411 TEST_REQUIRES_ARM_NEON;
1412 for (size_t channels = 1; channels <= 80; channels += 15) {
1413 for (size_t step = 2; step <= 25; step++) {
1414 DWConvMicrokernelTester()
1415 .cr(16)
1416 .kr(25)
1417 .channels(channels)
1418 .width(3)
1419 .step(step)
1420 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1421 }
1422 }
1423 }
1424
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,multipixel_with_output_stride)1425 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_output_stride) {
1426 TEST_REQUIRES_ARM_NEON;
1427 for (size_t channels = 1; channels <= 80; channels += 15) {
1428 DWConvMicrokernelTester()
1429 .cr(16)
1430 .kr(25)
1431 .channels(16)
1432 .width(5)
1433 .output_stride(83)
1434 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1435 }
1436 }
1437
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,multipixel_with_qmin)1438 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_qmin) {
1439 TEST_REQUIRES_ARM_NEON;
1440 for (size_t channels = 1; channels <= 80; channels += 15) {
1441 DWConvMicrokernelTester()
1442 .cr(16)
1443 .kr(25)
1444 .channels(channels)
1445 .width(3)
1446 .qmin(128)
1447 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1448 }
1449 }
1450
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,multipixel_with_qmax)1451 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, multipixel_with_qmax) {
1452 TEST_REQUIRES_ARM_NEON;
1453 for (size_t channels = 1; channels <= 80; channels += 15) {
1454 DWConvMicrokernelTester()
1455 .cr(16)
1456 .kr(25)
1457 .channels(channels)
1458 .width(3)
1459 .qmax(128)
1460 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1461 }
1462 }
1463
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,input_zero_point_only)1464 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, input_zero_point_only) {
1465 TEST_REQUIRES_ARM_NEON;
1466 for (size_t channels = 1; channels <= 80; channels += 15) {
1467 DWConvMicrokernelTester()
1468 .cr(16)
1469 .kr(25)
1470 .channels(channels)
1471 .width(3)
1472 .input_zero_point(255)
1473 .kernel_zero_point(0)
1474 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1475 }
1476 }
1477
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,kernel_zero_point_only)1478 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, kernel_zero_point_only) {
1479 TEST_REQUIRES_ARM_NEON;
1480 for (size_t channels = 1; channels <= 80; channels += 15) {
1481 DWConvMicrokernelTester()
1482 .cr(16)
1483 .kr(25)
1484 .channels(channels)
1485 .width(3)
1486 .input_zero_point(0)
1487 .kernel_zero_point(255)
1488 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1489 }
1490 }
1491
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,input_offset)1492 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, input_offset) {
1493 TEST_REQUIRES_ARM_NEON;
1494 for (uint32_t channels = 32; channels < 256; channels += 48) {
1495 DWConvMicrokernelTester()
1496 .cr(16)
1497 .kr(25)
1498 .channels(channels)
1499 .input_offset(304)
1500 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1501 }
1502 }
1503
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16,zero)1504 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEON_MUL16, zero) {
1505 TEST_REQUIRES_ARM_NEON;
1506 for (uint32_t mz = 0; mz < 25; mz++) {
1507 for (uint32_t channels = 32; channels < 256; channels += 48) {
1508 DWConvMicrokernelTester()
1509 .cr(16)
1510 .kr(25)
1511 .channels(channels)
1512 .input_offset(304)
1513 .zero_index(mz)
1514 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1515 }
1516 }
1517 }
1518 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1519
1520
1521 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_eq_16)1522 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_eq_16) {
1523 TEST_REQUIRES_ARM_NEON_V8;
1524 DWConvMicrokernelTester()
1525 .cr(16)
1526 .kr(25)
1527 .channels(16)
1528 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1529 }
1530
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_div_16)1531 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_div_16) {
1532 TEST_REQUIRES_ARM_NEON_V8;
1533 for (uint32_t channels = 32; channels < 256; channels += 48) {
1534 DWConvMicrokernelTester()
1535 .cr(16)
1536 .kr(25)
1537 .channels(channels)
1538 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1539 }
1540 }
1541
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_div_16_with_qmin)1542 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_div_16_with_qmin) {
1543 TEST_REQUIRES_ARM_NEON_V8;
1544 for (uint32_t channels = 32; channels < 256; channels += 48) {
1545 DWConvMicrokernelTester()
1546 .cr(16)
1547 .kr(25)
1548 .channels(channels)
1549 .qmin(128)
1550 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1551 }
1552 }
1553
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_div_16_with_qmax)1554 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_div_16_with_qmax) {
1555 TEST_REQUIRES_ARM_NEON_V8;
1556 for (uint32_t channels = 32; channels < 256; channels += 48) {
1557 DWConvMicrokernelTester()
1558 .cr(16)
1559 .kr(25)
1560 .channels(channels)
1561 .qmax(128)
1562 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1563 }
1564 }
1565
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_lt_16)1566 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_lt_16) {
1567 TEST_REQUIRES_ARM_NEON_V8;
1568 for (uint32_t channels = 1; channels < 16; channels++) {
1569 DWConvMicrokernelTester()
1570 .cr(16)
1571 .kr(25)
1572 .channels(channels)
1573 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1574 }
1575 }
1576
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_gt_16)1577 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_gt_16) {
1578 TEST_REQUIRES_ARM_NEON_V8;
1579 for (uint32_t channels = 17; channels < 32; channels++) {
1580 DWConvMicrokernelTester()
1581 .cr(16)
1582 .kr(25)
1583 .channels(channels)
1584 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1585 }
1586 }
1587
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_gt_16_with_qmin)1588 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_gt_16_with_qmin) {
1589 TEST_REQUIRES_ARM_NEON_V8;
1590 for (uint32_t channels = 17; channels < 32; channels++) {
1591 DWConvMicrokernelTester()
1592 .cr(16)
1593 .kr(25)
1594 .channels(channels)
1595 .qmin(128)
1596 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1597 }
1598 }
1599
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,c_gt_16_with_qmax)1600 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, c_gt_16_with_qmax) {
1601 TEST_REQUIRES_ARM_NEON_V8;
1602 for (uint32_t channels = 17; channels < 32; channels++) {
1603 DWConvMicrokernelTester()
1604 .cr(16)
1605 .kr(25)
1606 .channels(channels)
1607 .qmax(128)
1608 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1609 }
1610 }
1611
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,multipixel)1612 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel) {
1613 TEST_REQUIRES_ARM_NEON_V8;
1614 for (size_t channels = 1; channels <= 80; channels += 15) {
1615 DWConvMicrokernelTester()
1616 .cr(16)
1617 .kr(25)
1618 .channels(channels)
1619 .width(3)
1620 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1621 }
1622 }
1623
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,multipixel_with_step)1624 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_step) {
1625 TEST_REQUIRES_ARM_NEON_V8;
1626 for (size_t channels = 1; channels <= 80; channels += 15) {
1627 for (size_t step = 2; step <= 25; step++) {
1628 DWConvMicrokernelTester()
1629 .cr(16)
1630 .kr(25)
1631 .channels(channels)
1632 .width(3)
1633 .step(step)
1634 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1635 }
1636 }
1637 }
1638
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,multipixel_with_output_stride)1639 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_output_stride) {
1640 TEST_REQUIRES_ARM_NEON_V8;
1641 for (size_t channels = 1; channels <= 80; channels += 15) {
1642 DWConvMicrokernelTester()
1643 .cr(16)
1644 .kr(25)
1645 .channels(16)
1646 .width(5)
1647 .output_stride(83)
1648 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1649 }
1650 }
1651
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,multipixel_with_qmin)1652 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_qmin) {
1653 TEST_REQUIRES_ARM_NEON_V8;
1654 for (size_t channels = 1; channels <= 80; channels += 15) {
1655 DWConvMicrokernelTester()
1656 .cr(16)
1657 .kr(25)
1658 .channels(channels)
1659 .width(3)
1660 .qmin(128)
1661 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1662 }
1663 }
1664
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,multipixel_with_qmax)1665 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, multipixel_with_qmax) {
1666 TEST_REQUIRES_ARM_NEON_V8;
1667 for (size_t channels = 1; channels <= 80; channels += 15) {
1668 DWConvMicrokernelTester()
1669 .cr(16)
1670 .kr(25)
1671 .channels(channels)
1672 .width(3)
1673 .qmax(128)
1674 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1675 }
1676 }
1677
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,input_zero_point_only)1678 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, input_zero_point_only) {
1679 TEST_REQUIRES_ARM_NEON_V8;
1680 for (size_t channels = 1; channels <= 80; channels += 15) {
1681 DWConvMicrokernelTester()
1682 .cr(16)
1683 .kr(25)
1684 .channels(channels)
1685 .width(3)
1686 .input_zero_point(255)
1687 .kernel_zero_point(0)
1688 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1689 }
1690 }
1691
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,kernel_zero_point_only)1692 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, kernel_zero_point_only) {
1693 TEST_REQUIRES_ARM_NEON_V8;
1694 for (size_t channels = 1; channels <= 80; channels += 15) {
1695 DWConvMicrokernelTester()
1696 .cr(16)
1697 .kr(25)
1698 .channels(channels)
1699 .width(3)
1700 .input_zero_point(0)
1701 .kernel_zero_point(255)
1702 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1703 }
1704 }
1705
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,input_offset)1706 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, input_offset) {
1707 TEST_REQUIRES_ARM_NEON_V8;
1708 for (uint32_t channels = 32; channels < 256; channels += 48) {
1709 DWConvMicrokernelTester()
1710 .cr(16)
1711 .kr(25)
1712 .channels(channels)
1713 .input_offset(304)
1714 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1715 }
1716 }
1717
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16,zero)1718 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__NEONV8_MUL16, zero) {
1719 TEST_REQUIRES_ARM_NEON_V8;
1720 for (uint32_t mz = 0; mz < 25; mz++) {
1721 for (uint32_t channels = 32; channels < 256; channels += 48) {
1722 DWConvMicrokernelTester()
1723 .cr(16)
1724 .kr(25)
1725 .channels(channels)
1726 .input_offset(304)
1727 .zero_index(mz)
1728 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1729 }
1730 }
1731 }
1732 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1733
1734
1735 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_eq_24)1736 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_eq_24) {
1737 TEST_REQUIRES_ARM_NEON;
1738 DWConvMicrokernelTester()
1739 .cr(24)
1740 .kr(9)
1741 .channels(24)
1742 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1743 }
1744
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_div_24)1745 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_div_24) {
1746 TEST_REQUIRES_ARM_NEON;
1747 for (uint32_t channels = 48; channels < 384; channels += 72) {
1748 DWConvMicrokernelTester()
1749 .cr(24)
1750 .kr(9)
1751 .channels(channels)
1752 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1753 }
1754 }
1755
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_div_24_with_qmin)1756 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_div_24_with_qmin) {
1757 TEST_REQUIRES_ARM_NEON;
1758 for (uint32_t channels = 48; channels < 384; channels += 72) {
1759 DWConvMicrokernelTester()
1760 .cr(24)
1761 .kr(9)
1762 .channels(channels)
1763 .qmin(128)
1764 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1765 }
1766 }
1767
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_div_24_with_qmax)1768 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_div_24_with_qmax) {
1769 TEST_REQUIRES_ARM_NEON;
1770 for (uint32_t channels = 48; channels < 384; channels += 72) {
1771 DWConvMicrokernelTester()
1772 .cr(24)
1773 .kr(9)
1774 .channels(channels)
1775 .qmax(128)
1776 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1777 }
1778 }
1779
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_lt_24)1780 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_lt_24) {
1781 TEST_REQUIRES_ARM_NEON;
1782 for (uint32_t channels = 1; channels < 24; channels++) {
1783 DWConvMicrokernelTester()
1784 .cr(24)
1785 .kr(9)
1786 .channels(channels)
1787 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1788 }
1789 }
1790
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_gt_24)1791 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_gt_24) {
1792 TEST_REQUIRES_ARM_NEON;
1793 for (uint32_t channels = 25; channels < 48; channels++) {
1794 DWConvMicrokernelTester()
1795 .cr(24)
1796 .kr(9)
1797 .channels(channels)
1798 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1799 }
1800 }
1801
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_gt_24_with_qmin)1802 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_gt_24_with_qmin) {
1803 TEST_REQUIRES_ARM_NEON;
1804 for (uint32_t channels = 25; channels < 48; channels++) {
1805 DWConvMicrokernelTester()
1806 .cr(24)
1807 .kr(9)
1808 .channels(channels)
1809 .qmin(128)
1810 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1811 }
1812 }
1813
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,c_gt_24_with_qmax)1814 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, c_gt_24_with_qmax) {
1815 TEST_REQUIRES_ARM_NEON;
1816 for (uint32_t channels = 25; channels < 48; channels++) {
1817 DWConvMicrokernelTester()
1818 .cr(24)
1819 .kr(9)
1820 .channels(channels)
1821 .qmax(128)
1822 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1823 }
1824 }
1825
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,multipixel)1826 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel) {
1827 TEST_REQUIRES_ARM_NEON;
1828 for (size_t channels = 1; channels <= 120; channels += 23) {
1829 DWConvMicrokernelTester()
1830 .cr(24)
1831 .kr(9)
1832 .channels(channels)
1833 .width(3)
1834 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1835 }
1836 }
1837
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,multipixel_with_step)1838 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_step) {
1839 TEST_REQUIRES_ARM_NEON;
1840 for (size_t channels = 1; channels <= 120; channels += 23) {
1841 for (size_t step = 2; step <= 9; step++) {
1842 DWConvMicrokernelTester()
1843 .cr(24)
1844 .kr(9)
1845 .channels(channels)
1846 .width(3)
1847 .step(step)
1848 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1849 }
1850 }
1851 }
1852
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,multipixel_with_output_stride)1853 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_output_stride) {
1854 TEST_REQUIRES_ARM_NEON;
1855 for (size_t channels = 1; channels <= 120; channels += 23) {
1856 DWConvMicrokernelTester()
1857 .cr(24)
1858 .kr(9)
1859 .channels(24)
1860 .width(5)
1861 .output_stride(127)
1862 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1863 }
1864 }
1865
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,multipixel_with_qmin)1866 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_qmin) {
1867 TEST_REQUIRES_ARM_NEON;
1868 for (size_t channels = 1; channels <= 120; channels += 23) {
1869 DWConvMicrokernelTester()
1870 .cr(24)
1871 .kr(9)
1872 .channels(channels)
1873 .width(3)
1874 .qmin(128)
1875 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1876 }
1877 }
1878
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,multipixel_with_qmax)1879 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, multipixel_with_qmax) {
1880 TEST_REQUIRES_ARM_NEON;
1881 for (size_t channels = 1; channels <= 120; channels += 23) {
1882 DWConvMicrokernelTester()
1883 .cr(24)
1884 .kr(9)
1885 .channels(channels)
1886 .width(3)
1887 .qmax(128)
1888 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1889 }
1890 }
1891
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,input_zero_point_only)1892 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, input_zero_point_only) {
1893 TEST_REQUIRES_ARM_NEON;
1894 for (size_t channels = 1; channels <= 120; channels += 23) {
1895 DWConvMicrokernelTester()
1896 .cr(24)
1897 .kr(9)
1898 .channels(channels)
1899 .width(3)
1900 .input_zero_point(255)
1901 .kernel_zero_point(0)
1902 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1903 }
1904 }
1905
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,kernel_zero_point_only)1906 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, kernel_zero_point_only) {
1907 TEST_REQUIRES_ARM_NEON;
1908 for (size_t channels = 1; channels <= 120; channels += 23) {
1909 DWConvMicrokernelTester()
1910 .cr(24)
1911 .kr(9)
1912 .channels(channels)
1913 .width(3)
1914 .input_zero_point(0)
1915 .kernel_zero_point(255)
1916 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1917 }
1918 }
1919
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,input_offset)1920 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, input_offset) {
1921 TEST_REQUIRES_ARM_NEON;
1922 for (uint32_t channels = 48; channels < 384; channels += 72) {
1923 DWConvMicrokernelTester()
1924 .cr(24)
1925 .kr(9)
1926 .channels(channels)
1927 .input_offset(464)
1928 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1929 }
1930 }
1931
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16,zero)1932 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEON_MUL16, zero) {
1933 TEST_REQUIRES_ARM_NEON;
1934 for (uint32_t mz = 0; mz < 9; mz++) {
1935 for (uint32_t channels = 48; channels < 384; channels += 72) {
1936 DWConvMicrokernelTester()
1937 .cr(24)
1938 .kr(9)
1939 .channels(channels)
1940 .input_offset(464)
1941 .zero_index(mz)
1942 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
1943 }
1944 }
1945 }
1946 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1947
1948
1949 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_eq_24)1950 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_eq_24) {
1951 TEST_REQUIRES_ARM_NEON_V8;
1952 DWConvMicrokernelTester()
1953 .cr(24)
1954 .kr(9)
1955 .channels(24)
1956 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1957 }
1958
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_div_24)1959 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_div_24) {
1960 TEST_REQUIRES_ARM_NEON_V8;
1961 for (uint32_t channels = 48; channels < 384; channels += 72) {
1962 DWConvMicrokernelTester()
1963 .cr(24)
1964 .kr(9)
1965 .channels(channels)
1966 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1967 }
1968 }
1969
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_div_24_with_qmin)1970 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_div_24_with_qmin) {
1971 TEST_REQUIRES_ARM_NEON_V8;
1972 for (uint32_t channels = 48; channels < 384; channels += 72) {
1973 DWConvMicrokernelTester()
1974 .cr(24)
1975 .kr(9)
1976 .channels(channels)
1977 .qmin(128)
1978 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1979 }
1980 }
1981
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_div_24_with_qmax)1982 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_div_24_with_qmax) {
1983 TEST_REQUIRES_ARM_NEON_V8;
1984 for (uint32_t channels = 48; channels < 384; channels += 72) {
1985 DWConvMicrokernelTester()
1986 .cr(24)
1987 .kr(9)
1988 .channels(channels)
1989 .qmax(128)
1990 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
1991 }
1992 }
1993
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_lt_24)1994 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_lt_24) {
1995 TEST_REQUIRES_ARM_NEON_V8;
1996 for (uint32_t channels = 1; channels < 24; channels++) {
1997 DWConvMicrokernelTester()
1998 .cr(24)
1999 .kr(9)
2000 .channels(channels)
2001 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2002 }
2003 }
2004
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_gt_24)2005 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_gt_24) {
2006 TEST_REQUIRES_ARM_NEON_V8;
2007 for (uint32_t channels = 25; channels < 48; channels++) {
2008 DWConvMicrokernelTester()
2009 .cr(24)
2010 .kr(9)
2011 .channels(channels)
2012 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2013 }
2014 }
2015
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_gt_24_with_qmin)2016 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_gt_24_with_qmin) {
2017 TEST_REQUIRES_ARM_NEON_V8;
2018 for (uint32_t channels = 25; channels < 48; channels++) {
2019 DWConvMicrokernelTester()
2020 .cr(24)
2021 .kr(9)
2022 .channels(channels)
2023 .qmin(128)
2024 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2025 }
2026 }
2027
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,c_gt_24_with_qmax)2028 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, c_gt_24_with_qmax) {
2029 TEST_REQUIRES_ARM_NEON_V8;
2030 for (uint32_t channels = 25; channels < 48; channels++) {
2031 DWConvMicrokernelTester()
2032 .cr(24)
2033 .kr(9)
2034 .channels(channels)
2035 .qmax(128)
2036 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2037 }
2038 }
2039
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,multipixel)2040 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel) {
2041 TEST_REQUIRES_ARM_NEON_V8;
2042 for (size_t channels = 1; channels <= 120; channels += 23) {
2043 DWConvMicrokernelTester()
2044 .cr(24)
2045 .kr(9)
2046 .channels(channels)
2047 .width(3)
2048 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2049 }
2050 }
2051
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,multipixel_with_step)2052 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_step) {
2053 TEST_REQUIRES_ARM_NEON_V8;
2054 for (size_t channels = 1; channels <= 120; channels += 23) {
2055 for (size_t step = 2; step <= 9; step++) {
2056 DWConvMicrokernelTester()
2057 .cr(24)
2058 .kr(9)
2059 .channels(channels)
2060 .width(3)
2061 .step(step)
2062 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2063 }
2064 }
2065 }
2066
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,multipixel_with_output_stride)2067 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_output_stride) {
2068 TEST_REQUIRES_ARM_NEON_V8;
2069 for (size_t channels = 1; channels <= 120; channels += 23) {
2070 DWConvMicrokernelTester()
2071 .cr(24)
2072 .kr(9)
2073 .channels(24)
2074 .width(5)
2075 .output_stride(127)
2076 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2077 }
2078 }
2079
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,multipixel_with_qmin)2080 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_qmin) {
2081 TEST_REQUIRES_ARM_NEON_V8;
2082 for (size_t channels = 1; channels <= 120; channels += 23) {
2083 DWConvMicrokernelTester()
2084 .cr(24)
2085 .kr(9)
2086 .channels(channels)
2087 .width(3)
2088 .qmin(128)
2089 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2090 }
2091 }
2092
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,multipixel_with_qmax)2093 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, multipixel_with_qmax) {
2094 TEST_REQUIRES_ARM_NEON_V8;
2095 for (size_t channels = 1; channels <= 120; channels += 23) {
2096 DWConvMicrokernelTester()
2097 .cr(24)
2098 .kr(9)
2099 .channels(channels)
2100 .width(3)
2101 .qmax(128)
2102 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2103 }
2104 }
2105
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,input_zero_point_only)2106 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, input_zero_point_only) {
2107 TEST_REQUIRES_ARM_NEON_V8;
2108 for (size_t channels = 1; channels <= 120; channels += 23) {
2109 DWConvMicrokernelTester()
2110 .cr(24)
2111 .kr(9)
2112 .channels(channels)
2113 .width(3)
2114 .input_zero_point(255)
2115 .kernel_zero_point(0)
2116 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2117 }
2118 }
2119
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,kernel_zero_point_only)2120 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, kernel_zero_point_only) {
2121 TEST_REQUIRES_ARM_NEON_V8;
2122 for (size_t channels = 1; channels <= 120; channels += 23) {
2123 DWConvMicrokernelTester()
2124 .cr(24)
2125 .kr(9)
2126 .channels(channels)
2127 .width(3)
2128 .input_zero_point(0)
2129 .kernel_zero_point(255)
2130 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2131 }
2132 }
2133
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,input_offset)2134 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, input_offset) {
2135 TEST_REQUIRES_ARM_NEON_V8;
2136 for (uint32_t channels = 48; channels < 384; channels += 72) {
2137 DWConvMicrokernelTester()
2138 .cr(24)
2139 .kr(9)
2140 .channels(channels)
2141 .input_offset(464)
2142 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2143 }
2144 }
2145
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16,zero)2146 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__NEONV8_MUL16, zero) {
2147 TEST_REQUIRES_ARM_NEON_V8;
2148 for (uint32_t mz = 0; mz < 9; mz++) {
2149 for (uint32_t channels = 48; channels < 384; channels += 72) {
2150 DWConvMicrokernelTester()
2151 .cr(24)
2152 .kr(9)
2153 .channels(channels)
2154 .input_offset(464)
2155 .zero_index(mz)
2156 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2157 }
2158 }
2159 }
2160 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2161
2162
2163 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_eq_24)2164 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_eq_24) {
2165 TEST_REQUIRES_ARM_NEON;
2166 DWConvMicrokernelTester()
2167 .cr(24)
2168 .kr(25)
2169 .channels(24)
2170 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2171 }
2172
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_div_24)2173 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_div_24) {
2174 TEST_REQUIRES_ARM_NEON;
2175 for (uint32_t channels = 48; channels < 384; channels += 72) {
2176 DWConvMicrokernelTester()
2177 .cr(24)
2178 .kr(25)
2179 .channels(channels)
2180 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2181 }
2182 }
2183
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_div_24_with_qmin)2184 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_div_24_with_qmin) {
2185 TEST_REQUIRES_ARM_NEON;
2186 for (uint32_t channels = 48; channels < 384; channels += 72) {
2187 DWConvMicrokernelTester()
2188 .cr(24)
2189 .kr(25)
2190 .channels(channels)
2191 .qmin(128)
2192 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2193 }
2194 }
2195
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_div_24_with_qmax)2196 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_div_24_with_qmax) {
2197 TEST_REQUIRES_ARM_NEON;
2198 for (uint32_t channels = 48; channels < 384; channels += 72) {
2199 DWConvMicrokernelTester()
2200 .cr(24)
2201 .kr(25)
2202 .channels(channels)
2203 .qmax(128)
2204 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2205 }
2206 }
2207
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_lt_24)2208 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_lt_24) {
2209 TEST_REQUIRES_ARM_NEON;
2210 for (uint32_t channels = 1; channels < 24; channels++) {
2211 DWConvMicrokernelTester()
2212 .cr(24)
2213 .kr(25)
2214 .channels(channels)
2215 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2216 }
2217 }
2218
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_gt_24)2219 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_gt_24) {
2220 TEST_REQUIRES_ARM_NEON;
2221 for (uint32_t channels = 25; channels < 48; channels++) {
2222 DWConvMicrokernelTester()
2223 .cr(24)
2224 .kr(25)
2225 .channels(channels)
2226 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2227 }
2228 }
2229
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_gt_24_with_qmin)2230 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_gt_24_with_qmin) {
2231 TEST_REQUIRES_ARM_NEON;
2232 for (uint32_t channels = 25; channels < 48; channels++) {
2233 DWConvMicrokernelTester()
2234 .cr(24)
2235 .kr(25)
2236 .channels(channels)
2237 .qmin(128)
2238 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2239 }
2240 }
2241
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,c_gt_24_with_qmax)2242 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, c_gt_24_with_qmax) {
2243 TEST_REQUIRES_ARM_NEON;
2244 for (uint32_t channels = 25; channels < 48; channels++) {
2245 DWConvMicrokernelTester()
2246 .cr(24)
2247 .kr(25)
2248 .channels(channels)
2249 .qmax(128)
2250 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2251 }
2252 }
2253
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,multipixel)2254 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel) {
2255 TEST_REQUIRES_ARM_NEON;
2256 for (size_t channels = 1; channels <= 120; channels += 23) {
2257 DWConvMicrokernelTester()
2258 .cr(24)
2259 .kr(25)
2260 .channels(channels)
2261 .width(3)
2262 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2263 }
2264 }
2265
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,multipixel_with_step)2266 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_step) {
2267 TEST_REQUIRES_ARM_NEON;
2268 for (size_t channels = 1; channels <= 120; channels += 23) {
2269 for (size_t step = 2; step <= 25; step++) {
2270 DWConvMicrokernelTester()
2271 .cr(24)
2272 .kr(25)
2273 .channels(channels)
2274 .width(3)
2275 .step(step)
2276 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2277 }
2278 }
2279 }
2280
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,multipixel_with_output_stride)2281 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_output_stride) {
2282 TEST_REQUIRES_ARM_NEON;
2283 for (size_t channels = 1; channels <= 120; channels += 23) {
2284 DWConvMicrokernelTester()
2285 .cr(24)
2286 .kr(25)
2287 .channels(24)
2288 .width(5)
2289 .output_stride(127)
2290 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2291 }
2292 }
2293
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,multipixel_with_qmin)2294 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_qmin) {
2295 TEST_REQUIRES_ARM_NEON;
2296 for (size_t channels = 1; channels <= 120; channels += 23) {
2297 DWConvMicrokernelTester()
2298 .cr(24)
2299 .kr(25)
2300 .channels(channels)
2301 .width(3)
2302 .qmin(128)
2303 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2304 }
2305 }
2306
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,multipixel_with_qmax)2307 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, multipixel_with_qmax) {
2308 TEST_REQUIRES_ARM_NEON;
2309 for (size_t channels = 1; channels <= 120; channels += 23) {
2310 DWConvMicrokernelTester()
2311 .cr(24)
2312 .kr(25)
2313 .channels(channels)
2314 .width(3)
2315 .qmax(128)
2316 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2317 }
2318 }
2319
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,input_zero_point_only)2320 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, input_zero_point_only) {
2321 TEST_REQUIRES_ARM_NEON;
2322 for (size_t channels = 1; channels <= 120; channels += 23) {
2323 DWConvMicrokernelTester()
2324 .cr(24)
2325 .kr(25)
2326 .channels(channels)
2327 .width(3)
2328 .input_zero_point(255)
2329 .kernel_zero_point(0)
2330 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2331 }
2332 }
2333
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,kernel_zero_point_only)2334 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, kernel_zero_point_only) {
2335 TEST_REQUIRES_ARM_NEON;
2336 for (size_t channels = 1; channels <= 120; channels += 23) {
2337 DWConvMicrokernelTester()
2338 .cr(24)
2339 .kr(25)
2340 .channels(channels)
2341 .width(3)
2342 .input_zero_point(0)
2343 .kernel_zero_point(255)
2344 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2345 }
2346 }
2347
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,input_offset)2348 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, input_offset) {
2349 TEST_REQUIRES_ARM_NEON;
2350 for (uint32_t channels = 48; channels < 384; channels += 72) {
2351 DWConvMicrokernelTester()
2352 .cr(24)
2353 .kr(25)
2354 .channels(channels)
2355 .input_offset(464)
2356 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2357 }
2358 }
2359
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16,zero)2360 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEON_MUL16, zero) {
2361 TEST_REQUIRES_ARM_NEON;
2362 for (uint32_t mz = 0; mz < 25; mz++) {
2363 for (uint32_t channels = 48; channels < 384; channels += 72) {
2364 DWConvMicrokernelTester()
2365 .cr(24)
2366 .kr(25)
2367 .channels(channels)
2368 .input_offset(464)
2369 .zero_index(mz)
2370 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2371 }
2372 }
2373 }
2374 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2375
2376
2377 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_eq_24)2378 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_eq_24) {
2379 TEST_REQUIRES_ARM_NEON_V8;
2380 DWConvMicrokernelTester()
2381 .cr(24)
2382 .kr(25)
2383 .channels(24)
2384 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2385 }
2386
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_div_24)2387 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_div_24) {
2388 TEST_REQUIRES_ARM_NEON_V8;
2389 for (uint32_t channels = 48; channels < 384; channels += 72) {
2390 DWConvMicrokernelTester()
2391 .cr(24)
2392 .kr(25)
2393 .channels(channels)
2394 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2395 }
2396 }
2397
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_div_24_with_qmin)2398 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_div_24_with_qmin) {
2399 TEST_REQUIRES_ARM_NEON_V8;
2400 for (uint32_t channels = 48; channels < 384; channels += 72) {
2401 DWConvMicrokernelTester()
2402 .cr(24)
2403 .kr(25)
2404 .channels(channels)
2405 .qmin(128)
2406 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2407 }
2408 }
2409
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_div_24_with_qmax)2410 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_div_24_with_qmax) {
2411 TEST_REQUIRES_ARM_NEON_V8;
2412 for (uint32_t channels = 48; channels < 384; channels += 72) {
2413 DWConvMicrokernelTester()
2414 .cr(24)
2415 .kr(25)
2416 .channels(channels)
2417 .qmax(128)
2418 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2419 }
2420 }
2421
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_lt_24)2422 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_lt_24) {
2423 TEST_REQUIRES_ARM_NEON_V8;
2424 for (uint32_t channels = 1; channels < 24; channels++) {
2425 DWConvMicrokernelTester()
2426 .cr(24)
2427 .kr(25)
2428 .channels(channels)
2429 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2430 }
2431 }
2432
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_gt_24)2433 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_gt_24) {
2434 TEST_REQUIRES_ARM_NEON_V8;
2435 for (uint32_t channels = 25; channels < 48; channels++) {
2436 DWConvMicrokernelTester()
2437 .cr(24)
2438 .kr(25)
2439 .channels(channels)
2440 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2441 }
2442 }
2443
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_gt_24_with_qmin)2444 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_gt_24_with_qmin) {
2445 TEST_REQUIRES_ARM_NEON_V8;
2446 for (uint32_t channels = 25; channels < 48; channels++) {
2447 DWConvMicrokernelTester()
2448 .cr(24)
2449 .kr(25)
2450 .channels(channels)
2451 .qmin(128)
2452 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2453 }
2454 }
2455
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,c_gt_24_with_qmax)2456 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, c_gt_24_with_qmax) {
2457 TEST_REQUIRES_ARM_NEON_V8;
2458 for (uint32_t channels = 25; channels < 48; channels++) {
2459 DWConvMicrokernelTester()
2460 .cr(24)
2461 .kr(25)
2462 .channels(channels)
2463 .qmax(128)
2464 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2465 }
2466 }
2467
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,multipixel)2468 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel) {
2469 TEST_REQUIRES_ARM_NEON_V8;
2470 for (size_t channels = 1; channels <= 120; channels += 23) {
2471 DWConvMicrokernelTester()
2472 .cr(24)
2473 .kr(25)
2474 .channels(channels)
2475 .width(3)
2476 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2477 }
2478 }
2479
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,multipixel_with_step)2480 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_step) {
2481 TEST_REQUIRES_ARM_NEON_V8;
2482 for (size_t channels = 1; channels <= 120; channels += 23) {
2483 for (size_t step = 2; step <= 25; step++) {
2484 DWConvMicrokernelTester()
2485 .cr(24)
2486 .kr(25)
2487 .channels(channels)
2488 .width(3)
2489 .step(step)
2490 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2491 }
2492 }
2493 }
2494
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,multipixel_with_output_stride)2495 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_output_stride) {
2496 TEST_REQUIRES_ARM_NEON_V8;
2497 for (size_t channels = 1; channels <= 120; channels += 23) {
2498 DWConvMicrokernelTester()
2499 .cr(24)
2500 .kr(25)
2501 .channels(24)
2502 .width(5)
2503 .output_stride(127)
2504 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2505 }
2506 }
2507
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,multipixel_with_qmin)2508 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_qmin) {
2509 TEST_REQUIRES_ARM_NEON_V8;
2510 for (size_t channels = 1; channels <= 120; channels += 23) {
2511 DWConvMicrokernelTester()
2512 .cr(24)
2513 .kr(25)
2514 .channels(channels)
2515 .width(3)
2516 .qmin(128)
2517 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2518 }
2519 }
2520
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,multipixel_with_qmax)2521 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, multipixel_with_qmax) {
2522 TEST_REQUIRES_ARM_NEON_V8;
2523 for (size_t channels = 1; channels <= 120; channels += 23) {
2524 DWConvMicrokernelTester()
2525 .cr(24)
2526 .kr(25)
2527 .channels(channels)
2528 .width(3)
2529 .qmax(128)
2530 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2531 }
2532 }
2533
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,input_zero_point_only)2534 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, input_zero_point_only) {
2535 TEST_REQUIRES_ARM_NEON_V8;
2536 for (size_t channels = 1; channels <= 120; channels += 23) {
2537 DWConvMicrokernelTester()
2538 .cr(24)
2539 .kr(25)
2540 .channels(channels)
2541 .width(3)
2542 .input_zero_point(255)
2543 .kernel_zero_point(0)
2544 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2545 }
2546 }
2547
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,kernel_zero_point_only)2548 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, kernel_zero_point_only) {
2549 TEST_REQUIRES_ARM_NEON_V8;
2550 for (size_t channels = 1; channels <= 120; channels += 23) {
2551 DWConvMicrokernelTester()
2552 .cr(24)
2553 .kr(25)
2554 .channels(channels)
2555 .width(3)
2556 .input_zero_point(0)
2557 .kernel_zero_point(255)
2558 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2559 }
2560 }
2561
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,input_offset)2562 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, input_offset) {
2563 TEST_REQUIRES_ARM_NEON_V8;
2564 for (uint32_t channels = 48; channels < 384; channels += 72) {
2565 DWConvMicrokernelTester()
2566 .cr(24)
2567 .kr(25)
2568 .channels(channels)
2569 .input_offset(464)
2570 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2571 }
2572 }
2573
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16,zero)2574 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__NEONV8_MUL16, zero) {
2575 TEST_REQUIRES_ARM_NEON_V8;
2576 for (uint32_t mz = 0; mz < 25; mz++) {
2577 for (uint32_t channels = 48; channels < 384; channels += 72) {
2578 DWConvMicrokernelTester()
2579 .cr(24)
2580 .kr(25)
2581 .channels(channels)
2582 .input_offset(464)
2583 .zero_index(mz)
2584 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2585 }
2586 }
2587 }
2588 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2589
2590
2591 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_eq_32)2592 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_eq_32) {
2593 TEST_REQUIRES_ARM_NEON;
2594 DWConvMicrokernelTester()
2595 .cr(32)
2596 .kr(9)
2597 .channels(32)
2598 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2599 }
2600
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_div_32)2601 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_div_32) {
2602 TEST_REQUIRES_ARM_NEON;
2603 for (uint32_t channels = 64; channels < 512; channels += 96) {
2604 DWConvMicrokernelTester()
2605 .cr(32)
2606 .kr(9)
2607 .channels(channels)
2608 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2609 }
2610 }
2611
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_div_32_with_qmin)2612 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_div_32_with_qmin) {
2613 TEST_REQUIRES_ARM_NEON;
2614 for (uint32_t channels = 64; channels < 512; channels += 96) {
2615 DWConvMicrokernelTester()
2616 .cr(32)
2617 .kr(9)
2618 .channels(channels)
2619 .qmin(128)
2620 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2621 }
2622 }
2623
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_div_32_with_qmax)2624 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_div_32_with_qmax) {
2625 TEST_REQUIRES_ARM_NEON;
2626 for (uint32_t channels = 64; channels < 512; channels += 96) {
2627 DWConvMicrokernelTester()
2628 .cr(32)
2629 .kr(9)
2630 .channels(channels)
2631 .qmax(128)
2632 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2633 }
2634 }
2635
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_lt_32)2636 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_lt_32) {
2637 TEST_REQUIRES_ARM_NEON;
2638 for (uint32_t channels = 1; channels < 32; channels++) {
2639 DWConvMicrokernelTester()
2640 .cr(32)
2641 .kr(9)
2642 .channels(channels)
2643 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2644 }
2645 }
2646
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_gt_32)2647 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_gt_32) {
2648 TEST_REQUIRES_ARM_NEON;
2649 for (uint32_t channels = 33; channels < 64; channels++) {
2650 DWConvMicrokernelTester()
2651 .cr(32)
2652 .kr(9)
2653 .channels(channels)
2654 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2655 }
2656 }
2657
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_gt_32_with_qmin)2658 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_gt_32_with_qmin) {
2659 TEST_REQUIRES_ARM_NEON;
2660 for (uint32_t channels = 33; channels < 64; channels++) {
2661 DWConvMicrokernelTester()
2662 .cr(32)
2663 .kr(9)
2664 .channels(channels)
2665 .qmin(128)
2666 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2667 }
2668 }
2669
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,c_gt_32_with_qmax)2670 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, c_gt_32_with_qmax) {
2671 TEST_REQUIRES_ARM_NEON;
2672 for (uint32_t channels = 33; channels < 64; channels++) {
2673 DWConvMicrokernelTester()
2674 .cr(32)
2675 .kr(9)
2676 .channels(channels)
2677 .qmax(128)
2678 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2679 }
2680 }
2681
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,multipixel)2682 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel) {
2683 TEST_REQUIRES_ARM_NEON;
2684 for (size_t channels = 1; channels <= 160; channels += 31) {
2685 DWConvMicrokernelTester()
2686 .cr(32)
2687 .kr(9)
2688 .channels(channels)
2689 .width(3)
2690 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2691 }
2692 }
2693
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,multipixel_with_step)2694 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_step) {
2695 TEST_REQUIRES_ARM_NEON;
2696 for (size_t channels = 1; channels <= 160; channels += 31) {
2697 for (size_t step = 2; step <= 9; step++) {
2698 DWConvMicrokernelTester()
2699 .cr(32)
2700 .kr(9)
2701 .channels(channels)
2702 .width(3)
2703 .step(step)
2704 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2705 }
2706 }
2707 }
2708
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,multipixel_with_output_stride)2709 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_output_stride) {
2710 TEST_REQUIRES_ARM_NEON;
2711 for (size_t channels = 1; channels <= 160; channels += 31) {
2712 DWConvMicrokernelTester()
2713 .cr(32)
2714 .kr(9)
2715 .channels(32)
2716 .width(5)
2717 .output_stride(163)
2718 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2719 }
2720 }
2721
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,multipixel_with_qmin)2722 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_qmin) {
2723 TEST_REQUIRES_ARM_NEON;
2724 for (size_t channels = 1; channels <= 160; channels += 31) {
2725 DWConvMicrokernelTester()
2726 .cr(32)
2727 .kr(9)
2728 .channels(channels)
2729 .width(3)
2730 .qmin(128)
2731 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2732 }
2733 }
2734
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,multipixel_with_qmax)2735 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, multipixel_with_qmax) {
2736 TEST_REQUIRES_ARM_NEON;
2737 for (size_t channels = 1; channels <= 160; channels += 31) {
2738 DWConvMicrokernelTester()
2739 .cr(32)
2740 .kr(9)
2741 .channels(channels)
2742 .width(3)
2743 .qmax(128)
2744 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2745 }
2746 }
2747
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,input_zero_point_only)2748 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, input_zero_point_only) {
2749 TEST_REQUIRES_ARM_NEON;
2750 for (size_t channels = 1; channels <= 160; channels += 31) {
2751 DWConvMicrokernelTester()
2752 .cr(32)
2753 .kr(9)
2754 .channels(channels)
2755 .width(3)
2756 .input_zero_point(255)
2757 .kernel_zero_point(0)
2758 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2759 }
2760 }
2761
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,kernel_zero_point_only)2762 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, kernel_zero_point_only) {
2763 TEST_REQUIRES_ARM_NEON;
2764 for (size_t channels = 1; channels <= 160; channels += 31) {
2765 DWConvMicrokernelTester()
2766 .cr(32)
2767 .kr(9)
2768 .channels(channels)
2769 .width(3)
2770 .input_zero_point(0)
2771 .kernel_zero_point(255)
2772 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2773 }
2774 }
2775
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,input_offset)2776 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, input_offset) {
2777 TEST_REQUIRES_ARM_NEON;
2778 for (uint32_t channels = 64; channels < 512; channels += 96) {
2779 DWConvMicrokernelTester()
2780 .cr(32)
2781 .kr(9)
2782 .channels(channels)
2783 .input_offset(592)
2784 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2785 }
2786 }
2787
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16,zero)2788 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEON_MUL16, zero) {
2789 TEST_REQUIRES_ARM_NEON;
2790 for (uint32_t mz = 0; mz < 9; mz++) {
2791 for (uint32_t channels = 64; channels < 512; channels += 96) {
2792 DWConvMicrokernelTester()
2793 .cr(32)
2794 .kr(9)
2795 .channels(channels)
2796 .input_offset(592)
2797 .zero_index(mz)
2798 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
2799 }
2800 }
2801 }
2802 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2803
2804
2805 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_eq_32)2806 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_eq_32) {
2807 TEST_REQUIRES_ARM_NEON_V8;
2808 DWConvMicrokernelTester()
2809 .cr(32)
2810 .kr(9)
2811 .channels(32)
2812 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2813 }
2814
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_div_32)2815 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_div_32) {
2816 TEST_REQUIRES_ARM_NEON_V8;
2817 for (uint32_t channels = 64; channels < 512; channels += 96) {
2818 DWConvMicrokernelTester()
2819 .cr(32)
2820 .kr(9)
2821 .channels(channels)
2822 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2823 }
2824 }
2825
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_div_32_with_qmin)2826 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_div_32_with_qmin) {
2827 TEST_REQUIRES_ARM_NEON_V8;
2828 for (uint32_t channels = 64; channels < 512; channels += 96) {
2829 DWConvMicrokernelTester()
2830 .cr(32)
2831 .kr(9)
2832 .channels(channels)
2833 .qmin(128)
2834 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2835 }
2836 }
2837
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_div_32_with_qmax)2838 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_div_32_with_qmax) {
2839 TEST_REQUIRES_ARM_NEON_V8;
2840 for (uint32_t channels = 64; channels < 512; channels += 96) {
2841 DWConvMicrokernelTester()
2842 .cr(32)
2843 .kr(9)
2844 .channels(channels)
2845 .qmax(128)
2846 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2847 }
2848 }
2849
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_lt_32)2850 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_lt_32) {
2851 TEST_REQUIRES_ARM_NEON_V8;
2852 for (uint32_t channels = 1; channels < 32; channels++) {
2853 DWConvMicrokernelTester()
2854 .cr(32)
2855 .kr(9)
2856 .channels(channels)
2857 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2858 }
2859 }
2860
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_gt_32)2861 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_gt_32) {
2862 TEST_REQUIRES_ARM_NEON_V8;
2863 for (uint32_t channels = 33; channels < 64; channels++) {
2864 DWConvMicrokernelTester()
2865 .cr(32)
2866 .kr(9)
2867 .channels(channels)
2868 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2869 }
2870 }
2871
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_gt_32_with_qmin)2872 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_gt_32_with_qmin) {
2873 TEST_REQUIRES_ARM_NEON_V8;
2874 for (uint32_t channels = 33; channels < 64; channels++) {
2875 DWConvMicrokernelTester()
2876 .cr(32)
2877 .kr(9)
2878 .channels(channels)
2879 .qmin(128)
2880 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2881 }
2882 }
2883
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,c_gt_32_with_qmax)2884 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, c_gt_32_with_qmax) {
2885 TEST_REQUIRES_ARM_NEON_V8;
2886 for (uint32_t channels = 33; channels < 64; channels++) {
2887 DWConvMicrokernelTester()
2888 .cr(32)
2889 .kr(9)
2890 .channels(channels)
2891 .qmax(128)
2892 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2893 }
2894 }
2895
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,multipixel)2896 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel) {
2897 TEST_REQUIRES_ARM_NEON_V8;
2898 for (size_t channels = 1; channels <= 160; channels += 31) {
2899 DWConvMicrokernelTester()
2900 .cr(32)
2901 .kr(9)
2902 .channels(channels)
2903 .width(3)
2904 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2905 }
2906 }
2907
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,multipixel_with_step)2908 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_step) {
2909 TEST_REQUIRES_ARM_NEON_V8;
2910 for (size_t channels = 1; channels <= 160; channels += 31) {
2911 for (size_t step = 2; step <= 9; step++) {
2912 DWConvMicrokernelTester()
2913 .cr(32)
2914 .kr(9)
2915 .channels(channels)
2916 .width(3)
2917 .step(step)
2918 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2919 }
2920 }
2921 }
2922
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,multipixel_with_output_stride)2923 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_output_stride) {
2924 TEST_REQUIRES_ARM_NEON_V8;
2925 for (size_t channels = 1; channels <= 160; channels += 31) {
2926 DWConvMicrokernelTester()
2927 .cr(32)
2928 .kr(9)
2929 .channels(32)
2930 .width(5)
2931 .output_stride(163)
2932 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2933 }
2934 }
2935
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,multipixel_with_qmin)2936 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_qmin) {
2937 TEST_REQUIRES_ARM_NEON_V8;
2938 for (size_t channels = 1; channels <= 160; channels += 31) {
2939 DWConvMicrokernelTester()
2940 .cr(32)
2941 .kr(9)
2942 .channels(channels)
2943 .width(3)
2944 .qmin(128)
2945 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2946 }
2947 }
2948
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,multipixel_with_qmax)2949 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, multipixel_with_qmax) {
2950 TEST_REQUIRES_ARM_NEON_V8;
2951 for (size_t channels = 1; channels <= 160; channels += 31) {
2952 DWConvMicrokernelTester()
2953 .cr(32)
2954 .kr(9)
2955 .channels(channels)
2956 .width(3)
2957 .qmax(128)
2958 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2959 }
2960 }
2961
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,input_zero_point_only)2962 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, input_zero_point_only) {
2963 TEST_REQUIRES_ARM_NEON_V8;
2964 for (size_t channels = 1; channels <= 160; channels += 31) {
2965 DWConvMicrokernelTester()
2966 .cr(32)
2967 .kr(9)
2968 .channels(channels)
2969 .width(3)
2970 .input_zero_point(255)
2971 .kernel_zero_point(0)
2972 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2973 }
2974 }
2975
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,kernel_zero_point_only)2976 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, kernel_zero_point_only) {
2977 TEST_REQUIRES_ARM_NEON_V8;
2978 for (size_t channels = 1; channels <= 160; channels += 31) {
2979 DWConvMicrokernelTester()
2980 .cr(32)
2981 .kr(9)
2982 .channels(channels)
2983 .width(3)
2984 .input_zero_point(0)
2985 .kernel_zero_point(255)
2986 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2987 }
2988 }
2989
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,input_offset)2990 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, input_offset) {
2991 TEST_REQUIRES_ARM_NEON_V8;
2992 for (uint32_t channels = 64; channels < 512; channels += 96) {
2993 DWConvMicrokernelTester()
2994 .cr(32)
2995 .kr(9)
2996 .channels(channels)
2997 .input_offset(592)
2998 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
2999 }
3000 }
3001
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16,zero)3002 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__NEONV8_MUL16, zero) {
3003 TEST_REQUIRES_ARM_NEON_V8;
3004 for (uint32_t mz = 0; mz < 9; mz++) {
3005 for (uint32_t channels = 64; channels < 512; channels += 96) {
3006 DWConvMicrokernelTester()
3007 .cr(32)
3008 .kr(9)
3009 .channels(channels)
3010 .input_offset(592)
3011 .zero_index(mz)
3012 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3013 }
3014 }
3015 }
3016 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3017
3018
3019 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_eq_32)3020 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_eq_32) {
3021 TEST_REQUIRES_ARM_NEON;
3022 DWConvMicrokernelTester()
3023 .cr(32)
3024 .kr(25)
3025 .channels(32)
3026 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3027 }
3028
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_div_32)3029 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_div_32) {
3030 TEST_REQUIRES_ARM_NEON;
3031 for (uint32_t channels = 64; channels < 512; channels += 96) {
3032 DWConvMicrokernelTester()
3033 .cr(32)
3034 .kr(25)
3035 .channels(channels)
3036 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3037 }
3038 }
3039
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_div_32_with_qmin)3040 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_div_32_with_qmin) {
3041 TEST_REQUIRES_ARM_NEON;
3042 for (uint32_t channels = 64; channels < 512; channels += 96) {
3043 DWConvMicrokernelTester()
3044 .cr(32)
3045 .kr(25)
3046 .channels(channels)
3047 .qmin(128)
3048 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3049 }
3050 }
3051
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_div_32_with_qmax)3052 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_div_32_with_qmax) {
3053 TEST_REQUIRES_ARM_NEON;
3054 for (uint32_t channels = 64; channels < 512; channels += 96) {
3055 DWConvMicrokernelTester()
3056 .cr(32)
3057 .kr(25)
3058 .channels(channels)
3059 .qmax(128)
3060 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3061 }
3062 }
3063
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_lt_32)3064 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_lt_32) {
3065 TEST_REQUIRES_ARM_NEON;
3066 for (uint32_t channels = 1; channels < 32; channels++) {
3067 DWConvMicrokernelTester()
3068 .cr(32)
3069 .kr(25)
3070 .channels(channels)
3071 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3072 }
3073 }
3074
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_gt_32)3075 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_gt_32) {
3076 TEST_REQUIRES_ARM_NEON;
3077 for (uint32_t channels = 33; channels < 64; channels++) {
3078 DWConvMicrokernelTester()
3079 .cr(32)
3080 .kr(25)
3081 .channels(channels)
3082 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3083 }
3084 }
3085
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_gt_32_with_qmin)3086 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_gt_32_with_qmin) {
3087 TEST_REQUIRES_ARM_NEON;
3088 for (uint32_t channels = 33; channels < 64; channels++) {
3089 DWConvMicrokernelTester()
3090 .cr(32)
3091 .kr(25)
3092 .channels(channels)
3093 .qmin(128)
3094 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3095 }
3096 }
3097
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,c_gt_32_with_qmax)3098 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, c_gt_32_with_qmax) {
3099 TEST_REQUIRES_ARM_NEON;
3100 for (uint32_t channels = 33; channels < 64; channels++) {
3101 DWConvMicrokernelTester()
3102 .cr(32)
3103 .kr(25)
3104 .channels(channels)
3105 .qmax(128)
3106 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3107 }
3108 }
3109
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,multipixel)3110 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel) {
3111 TEST_REQUIRES_ARM_NEON;
3112 for (size_t channels = 1; channels <= 160; channels += 31) {
3113 DWConvMicrokernelTester()
3114 .cr(32)
3115 .kr(25)
3116 .channels(channels)
3117 .width(3)
3118 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3119 }
3120 }
3121
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,multipixel_with_step)3122 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_step) {
3123 TEST_REQUIRES_ARM_NEON;
3124 for (size_t channels = 1; channels <= 160; channels += 31) {
3125 for (size_t step = 2; step <= 25; step++) {
3126 DWConvMicrokernelTester()
3127 .cr(32)
3128 .kr(25)
3129 .channels(channels)
3130 .width(3)
3131 .step(step)
3132 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3133 }
3134 }
3135 }
3136
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,multipixel_with_output_stride)3137 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_output_stride) {
3138 TEST_REQUIRES_ARM_NEON;
3139 for (size_t channels = 1; channels <= 160; channels += 31) {
3140 DWConvMicrokernelTester()
3141 .cr(32)
3142 .kr(25)
3143 .channels(32)
3144 .width(5)
3145 .output_stride(163)
3146 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3147 }
3148 }
3149
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,multipixel_with_qmin)3150 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_qmin) {
3151 TEST_REQUIRES_ARM_NEON;
3152 for (size_t channels = 1; channels <= 160; channels += 31) {
3153 DWConvMicrokernelTester()
3154 .cr(32)
3155 .kr(25)
3156 .channels(channels)
3157 .width(3)
3158 .qmin(128)
3159 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3160 }
3161 }
3162
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,multipixel_with_qmax)3163 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, multipixel_with_qmax) {
3164 TEST_REQUIRES_ARM_NEON;
3165 for (size_t channels = 1; channels <= 160; channels += 31) {
3166 DWConvMicrokernelTester()
3167 .cr(32)
3168 .kr(25)
3169 .channels(channels)
3170 .width(3)
3171 .qmax(128)
3172 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3173 }
3174 }
3175
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,input_zero_point_only)3176 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, input_zero_point_only) {
3177 TEST_REQUIRES_ARM_NEON;
3178 for (size_t channels = 1; channels <= 160; channels += 31) {
3179 DWConvMicrokernelTester()
3180 .cr(32)
3181 .kr(25)
3182 .channels(channels)
3183 .width(3)
3184 .input_zero_point(255)
3185 .kernel_zero_point(0)
3186 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3187 }
3188 }
3189
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,kernel_zero_point_only)3190 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, kernel_zero_point_only) {
3191 TEST_REQUIRES_ARM_NEON;
3192 for (size_t channels = 1; channels <= 160; channels += 31) {
3193 DWConvMicrokernelTester()
3194 .cr(32)
3195 .kr(25)
3196 .channels(channels)
3197 .width(3)
3198 .input_zero_point(0)
3199 .kernel_zero_point(255)
3200 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3201 }
3202 }
3203
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,input_offset)3204 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, input_offset) {
3205 TEST_REQUIRES_ARM_NEON;
3206 for (uint32_t channels = 64; channels < 512; channels += 96) {
3207 DWConvMicrokernelTester()
3208 .cr(32)
3209 .kr(25)
3210 .channels(channels)
3211 .input_offset(592)
3212 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3213 }
3214 }
3215
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16,zero)3216 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEON_MUL16, zero) {
3217 TEST_REQUIRES_ARM_NEON;
3218 for (uint32_t mz = 0; mz < 25; mz++) {
3219 for (uint32_t channels = 64; channels < 512; channels += 96) {
3220 DWConvMicrokernelTester()
3221 .cr(32)
3222 .kr(25)
3223 .channels(channels)
3224 .input_offset(592)
3225 .zero_index(mz)
3226 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
3227 }
3228 }
3229 }
3230 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3231
3232
3233 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_eq_32)3234 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_eq_32) {
3235 TEST_REQUIRES_ARM_NEON_V8;
3236 DWConvMicrokernelTester()
3237 .cr(32)
3238 .kr(25)
3239 .channels(32)
3240 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3241 }
3242
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_div_32)3243 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_div_32) {
3244 TEST_REQUIRES_ARM_NEON_V8;
3245 for (uint32_t channels = 64; channels < 512; channels += 96) {
3246 DWConvMicrokernelTester()
3247 .cr(32)
3248 .kr(25)
3249 .channels(channels)
3250 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3251 }
3252 }
3253
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_div_32_with_qmin)3254 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_div_32_with_qmin) {
3255 TEST_REQUIRES_ARM_NEON_V8;
3256 for (uint32_t channels = 64; channels < 512; channels += 96) {
3257 DWConvMicrokernelTester()
3258 .cr(32)
3259 .kr(25)
3260 .channels(channels)
3261 .qmin(128)
3262 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3263 }
3264 }
3265
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_div_32_with_qmax)3266 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_div_32_with_qmax) {
3267 TEST_REQUIRES_ARM_NEON_V8;
3268 for (uint32_t channels = 64; channels < 512; channels += 96) {
3269 DWConvMicrokernelTester()
3270 .cr(32)
3271 .kr(25)
3272 .channels(channels)
3273 .qmax(128)
3274 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3275 }
3276 }
3277
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_lt_32)3278 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_lt_32) {
3279 TEST_REQUIRES_ARM_NEON_V8;
3280 for (uint32_t channels = 1; channels < 32; channels++) {
3281 DWConvMicrokernelTester()
3282 .cr(32)
3283 .kr(25)
3284 .channels(channels)
3285 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3286 }
3287 }
3288
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_gt_32)3289 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_gt_32) {
3290 TEST_REQUIRES_ARM_NEON_V8;
3291 for (uint32_t channels = 33; channels < 64; channels++) {
3292 DWConvMicrokernelTester()
3293 .cr(32)
3294 .kr(25)
3295 .channels(channels)
3296 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3297 }
3298 }
3299
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_gt_32_with_qmin)3300 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_gt_32_with_qmin) {
3301 TEST_REQUIRES_ARM_NEON_V8;
3302 for (uint32_t channels = 33; channels < 64; channels++) {
3303 DWConvMicrokernelTester()
3304 .cr(32)
3305 .kr(25)
3306 .channels(channels)
3307 .qmin(128)
3308 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3309 }
3310 }
3311
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,c_gt_32_with_qmax)3312 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, c_gt_32_with_qmax) {
3313 TEST_REQUIRES_ARM_NEON_V8;
3314 for (uint32_t channels = 33; channels < 64; channels++) {
3315 DWConvMicrokernelTester()
3316 .cr(32)
3317 .kr(25)
3318 .channels(channels)
3319 .qmax(128)
3320 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3321 }
3322 }
3323
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,multipixel)3324 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel) {
3325 TEST_REQUIRES_ARM_NEON_V8;
3326 for (size_t channels = 1; channels <= 160; channels += 31) {
3327 DWConvMicrokernelTester()
3328 .cr(32)
3329 .kr(25)
3330 .channels(channels)
3331 .width(3)
3332 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3333 }
3334 }
3335
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,multipixel_with_step)3336 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_step) {
3337 TEST_REQUIRES_ARM_NEON_V8;
3338 for (size_t channels = 1; channels <= 160; channels += 31) {
3339 for (size_t step = 2; step <= 25; step++) {
3340 DWConvMicrokernelTester()
3341 .cr(32)
3342 .kr(25)
3343 .channels(channels)
3344 .width(3)
3345 .step(step)
3346 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3347 }
3348 }
3349 }
3350
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,multipixel_with_output_stride)3351 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_output_stride) {
3352 TEST_REQUIRES_ARM_NEON_V8;
3353 for (size_t channels = 1; channels <= 160; channels += 31) {
3354 DWConvMicrokernelTester()
3355 .cr(32)
3356 .kr(25)
3357 .channels(32)
3358 .width(5)
3359 .output_stride(163)
3360 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3361 }
3362 }
3363
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,multipixel_with_qmin)3364 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_qmin) {
3365 TEST_REQUIRES_ARM_NEON_V8;
3366 for (size_t channels = 1; channels <= 160; channels += 31) {
3367 DWConvMicrokernelTester()
3368 .cr(32)
3369 .kr(25)
3370 .channels(channels)
3371 .width(3)
3372 .qmin(128)
3373 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3374 }
3375 }
3376
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,multipixel_with_qmax)3377 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, multipixel_with_qmax) {
3378 TEST_REQUIRES_ARM_NEON_V8;
3379 for (size_t channels = 1; channels <= 160; channels += 31) {
3380 DWConvMicrokernelTester()
3381 .cr(32)
3382 .kr(25)
3383 .channels(channels)
3384 .width(3)
3385 .qmax(128)
3386 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3387 }
3388 }
3389
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,input_zero_point_only)3390 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, input_zero_point_only) {
3391 TEST_REQUIRES_ARM_NEON_V8;
3392 for (size_t channels = 1; channels <= 160; channels += 31) {
3393 DWConvMicrokernelTester()
3394 .cr(32)
3395 .kr(25)
3396 .channels(channels)
3397 .width(3)
3398 .input_zero_point(255)
3399 .kernel_zero_point(0)
3400 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3401 }
3402 }
3403
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,kernel_zero_point_only)3404 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, kernel_zero_point_only) {
3405 TEST_REQUIRES_ARM_NEON_V8;
3406 for (size_t channels = 1; channels <= 160; channels += 31) {
3407 DWConvMicrokernelTester()
3408 .cr(32)
3409 .kr(25)
3410 .channels(channels)
3411 .width(3)
3412 .input_zero_point(0)
3413 .kernel_zero_point(255)
3414 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3415 }
3416 }
3417
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,input_offset)3418 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, input_offset) {
3419 TEST_REQUIRES_ARM_NEON_V8;
3420 for (uint32_t channels = 64; channels < 512; channels += 96) {
3421 DWConvMicrokernelTester()
3422 .cr(32)
3423 .kr(25)
3424 .channels(channels)
3425 .input_offset(592)
3426 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3427 }
3428 }
3429
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16,zero)3430 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__NEONV8_MUL16, zero) {
3431 TEST_REQUIRES_ARM_NEON_V8;
3432 for (uint32_t mz = 0; mz < 25; mz++) {
3433 for (uint32_t channels = 64; channels < 512; channels += 96) {
3434 DWConvMicrokernelTester()
3435 .cr(32)
3436 .kr(25)
3437 .channels(channels)
3438 .input_offset(592)
3439 .zero_index(mz)
3440 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__neonv8_mul16, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
3441 }
3442 }
3443 }
3444 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3445
3446
3447 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_eq_8)3448 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_eq_8) {
3449 TEST_REQUIRES_X86_SSE2;
3450 DWConvMicrokernelTester()
3451 .cr(8)
3452 .kr(9)
3453 .channels(8)
3454 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3455 }
3456
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_div_8)3457 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_div_8) {
3458 TEST_REQUIRES_X86_SSE2;
3459 for (uint32_t channels = 16; channels < 128; channels += 24) {
3460 DWConvMicrokernelTester()
3461 .cr(8)
3462 .kr(9)
3463 .channels(channels)
3464 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3465 }
3466 }
3467
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_div_8_with_qmin)3468 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_div_8_with_qmin) {
3469 TEST_REQUIRES_X86_SSE2;
3470 for (uint32_t channels = 16; channels < 128; channels += 24) {
3471 DWConvMicrokernelTester()
3472 .cr(8)
3473 .kr(9)
3474 .channels(channels)
3475 .qmin(128)
3476 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3477 }
3478 }
3479
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_div_8_with_qmax)3480 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_div_8_with_qmax) {
3481 TEST_REQUIRES_X86_SSE2;
3482 for (uint32_t channels = 16; channels < 128; channels += 24) {
3483 DWConvMicrokernelTester()
3484 .cr(8)
3485 .kr(9)
3486 .channels(channels)
3487 .qmax(128)
3488 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3489 }
3490 }
3491
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_lt_8)3492 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_lt_8) {
3493 TEST_REQUIRES_X86_SSE2;
3494 for (uint32_t channels = 1; channels < 8; channels++) {
3495 DWConvMicrokernelTester()
3496 .cr(8)
3497 .kr(9)
3498 .channels(channels)
3499 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3500 }
3501 }
3502
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_gt_8)3503 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_gt_8) {
3504 TEST_REQUIRES_X86_SSE2;
3505 for (uint32_t channels = 9; channels < 16; channels++) {
3506 DWConvMicrokernelTester()
3507 .cr(8)
3508 .kr(9)
3509 .channels(channels)
3510 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3511 }
3512 }
3513
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_gt_8_with_qmin)3514 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_gt_8_with_qmin) {
3515 TEST_REQUIRES_X86_SSE2;
3516 for (uint32_t channels = 9; channels < 16; channels++) {
3517 DWConvMicrokernelTester()
3518 .cr(8)
3519 .kr(9)
3520 .channels(channels)
3521 .qmin(128)
3522 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3523 }
3524 }
3525
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,c_gt_8_with_qmax)3526 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, c_gt_8_with_qmax) {
3527 TEST_REQUIRES_X86_SSE2;
3528 for (uint32_t channels = 9; channels < 16; channels++) {
3529 DWConvMicrokernelTester()
3530 .cr(8)
3531 .kr(9)
3532 .channels(channels)
3533 .qmax(128)
3534 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3535 }
3536 }
3537
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,multipixel)3538 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel) {
3539 TEST_REQUIRES_X86_SSE2;
3540 for (size_t channels = 1; channels <= 40; channels += 7) {
3541 DWConvMicrokernelTester()
3542 .cr(8)
3543 .kr(9)
3544 .channels(channels)
3545 .width(3)
3546 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3547 }
3548 }
3549
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,multipixel_with_step)3550 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_step) {
3551 TEST_REQUIRES_X86_SSE2;
3552 for (size_t channels = 1; channels <= 40; channels += 7) {
3553 for (size_t step = 2; step <= 9; step++) {
3554 DWConvMicrokernelTester()
3555 .cr(8)
3556 .kr(9)
3557 .channels(channels)
3558 .width(3)
3559 .step(step)
3560 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3561 }
3562 }
3563 }
3564
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,multipixel_with_output_stride)3565 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_output_stride) {
3566 TEST_REQUIRES_X86_SSE2;
3567 for (size_t channels = 1; channels <= 40; channels += 7) {
3568 DWConvMicrokernelTester()
3569 .cr(8)
3570 .kr(9)
3571 .channels(8)
3572 .width(5)
3573 .output_stride(43)
3574 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3575 }
3576 }
3577
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,multipixel_with_qmin)3578 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_qmin) {
3579 TEST_REQUIRES_X86_SSE2;
3580 for (size_t channels = 1; channels <= 40; channels += 7) {
3581 DWConvMicrokernelTester()
3582 .cr(8)
3583 .kr(9)
3584 .channels(channels)
3585 .width(3)
3586 .qmin(128)
3587 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3588 }
3589 }
3590
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,multipixel_with_qmax)3591 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, multipixel_with_qmax) {
3592 TEST_REQUIRES_X86_SSE2;
3593 for (size_t channels = 1; channels <= 40; channels += 7) {
3594 DWConvMicrokernelTester()
3595 .cr(8)
3596 .kr(9)
3597 .channels(channels)
3598 .width(3)
3599 .qmax(128)
3600 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3601 }
3602 }
3603
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,input_zero_point_only)3604 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, input_zero_point_only) {
3605 TEST_REQUIRES_X86_SSE2;
3606 for (size_t channels = 1; channels <= 40; channels += 7) {
3607 DWConvMicrokernelTester()
3608 .cr(8)
3609 .kr(9)
3610 .channels(channels)
3611 .width(3)
3612 .input_zero_point(255)
3613 .kernel_zero_point(0)
3614 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3615 }
3616 }
3617
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,kernel_zero_point_only)3618 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, kernel_zero_point_only) {
3619 TEST_REQUIRES_X86_SSE2;
3620 for (size_t channels = 1; channels <= 40; channels += 7) {
3621 DWConvMicrokernelTester()
3622 .cr(8)
3623 .kr(9)
3624 .channels(channels)
3625 .width(3)
3626 .input_zero_point(0)
3627 .kernel_zero_point(255)
3628 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3629 }
3630 }
3631
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,input_offset)3632 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, input_offset) {
3633 TEST_REQUIRES_X86_SSE2;
3634 for (uint32_t channels = 16; channels < 128; channels += 24) {
3635 DWConvMicrokernelTester()
3636 .cr(8)
3637 .kr(9)
3638 .channels(channels)
3639 .input_offset(176)
3640 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3641 }
3642 }
3643
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16,zero)3644 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE2_MUL16, zero) {
3645 TEST_REQUIRES_X86_SSE2;
3646 for (uint32_t mz = 0; mz < 9; mz++) {
3647 for (uint32_t channels = 16; channels < 128; channels += 24) {
3648 DWConvMicrokernelTester()
3649 .cr(8)
3650 .kr(9)
3651 .channels(channels)
3652 .input_offset(176)
3653 .zero_index(mz)
3654 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3655 }
3656 }
3657 }
3658 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3659
3660
3661 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_eq_8)3662 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_eq_8) {
3663 TEST_REQUIRES_X86_SSE41;
3664 DWConvMicrokernelTester()
3665 .cr(8)
3666 .kr(9)
3667 .channels(8)
3668 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3669 }
3670
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_div_8)3671 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_div_8) {
3672 TEST_REQUIRES_X86_SSE41;
3673 for (uint32_t channels = 16; channels < 128; channels += 24) {
3674 DWConvMicrokernelTester()
3675 .cr(8)
3676 .kr(9)
3677 .channels(channels)
3678 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3679 }
3680 }
3681
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_div_8_with_qmin)3682 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_div_8_with_qmin) {
3683 TEST_REQUIRES_X86_SSE41;
3684 for (uint32_t channels = 16; channels < 128; channels += 24) {
3685 DWConvMicrokernelTester()
3686 .cr(8)
3687 .kr(9)
3688 .channels(channels)
3689 .qmin(128)
3690 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3691 }
3692 }
3693
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_div_8_with_qmax)3694 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_div_8_with_qmax) {
3695 TEST_REQUIRES_X86_SSE41;
3696 for (uint32_t channels = 16; channels < 128; channels += 24) {
3697 DWConvMicrokernelTester()
3698 .cr(8)
3699 .kr(9)
3700 .channels(channels)
3701 .qmax(128)
3702 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3703 }
3704 }
3705
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_lt_8)3706 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_lt_8) {
3707 TEST_REQUIRES_X86_SSE41;
3708 for (uint32_t channels = 1; channels < 8; channels++) {
3709 DWConvMicrokernelTester()
3710 .cr(8)
3711 .kr(9)
3712 .channels(channels)
3713 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3714 }
3715 }
3716
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_gt_8)3717 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_gt_8) {
3718 TEST_REQUIRES_X86_SSE41;
3719 for (uint32_t channels = 9; channels < 16; channels++) {
3720 DWConvMicrokernelTester()
3721 .cr(8)
3722 .kr(9)
3723 .channels(channels)
3724 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3725 }
3726 }
3727
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_gt_8_with_qmin)3728 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_gt_8_with_qmin) {
3729 TEST_REQUIRES_X86_SSE41;
3730 for (uint32_t channels = 9; channels < 16; channels++) {
3731 DWConvMicrokernelTester()
3732 .cr(8)
3733 .kr(9)
3734 .channels(channels)
3735 .qmin(128)
3736 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3737 }
3738 }
3739
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,c_gt_8_with_qmax)3740 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, c_gt_8_with_qmax) {
3741 TEST_REQUIRES_X86_SSE41;
3742 for (uint32_t channels = 9; channels < 16; channels++) {
3743 DWConvMicrokernelTester()
3744 .cr(8)
3745 .kr(9)
3746 .channels(channels)
3747 .qmax(128)
3748 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3749 }
3750 }
3751
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,multipixel)3752 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel) {
3753 TEST_REQUIRES_X86_SSE41;
3754 for (size_t channels = 1; channels <= 40; channels += 7) {
3755 DWConvMicrokernelTester()
3756 .cr(8)
3757 .kr(9)
3758 .channels(channels)
3759 .width(3)
3760 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3761 }
3762 }
3763
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,multipixel_with_step)3764 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_step) {
3765 TEST_REQUIRES_X86_SSE41;
3766 for (size_t channels = 1; channels <= 40; channels += 7) {
3767 for (size_t step = 2; step <= 9; step++) {
3768 DWConvMicrokernelTester()
3769 .cr(8)
3770 .kr(9)
3771 .channels(channels)
3772 .width(3)
3773 .step(step)
3774 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3775 }
3776 }
3777 }
3778
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,multipixel_with_output_stride)3779 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_output_stride) {
3780 TEST_REQUIRES_X86_SSE41;
3781 for (size_t channels = 1; channels <= 40; channels += 7) {
3782 DWConvMicrokernelTester()
3783 .cr(8)
3784 .kr(9)
3785 .channels(8)
3786 .width(5)
3787 .output_stride(43)
3788 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3789 }
3790 }
3791
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,multipixel_with_qmin)3792 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_qmin) {
3793 TEST_REQUIRES_X86_SSE41;
3794 for (size_t channels = 1; channels <= 40; channels += 7) {
3795 DWConvMicrokernelTester()
3796 .cr(8)
3797 .kr(9)
3798 .channels(channels)
3799 .width(3)
3800 .qmin(128)
3801 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3802 }
3803 }
3804
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,multipixel_with_qmax)3805 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, multipixel_with_qmax) {
3806 TEST_REQUIRES_X86_SSE41;
3807 for (size_t channels = 1; channels <= 40; channels += 7) {
3808 DWConvMicrokernelTester()
3809 .cr(8)
3810 .kr(9)
3811 .channels(channels)
3812 .width(3)
3813 .qmax(128)
3814 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3815 }
3816 }
3817
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,input_zero_point_only)3818 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, input_zero_point_only) {
3819 TEST_REQUIRES_X86_SSE41;
3820 for (size_t channels = 1; channels <= 40; channels += 7) {
3821 DWConvMicrokernelTester()
3822 .cr(8)
3823 .kr(9)
3824 .channels(channels)
3825 .width(3)
3826 .input_zero_point(255)
3827 .kernel_zero_point(0)
3828 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3829 }
3830 }
3831
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,kernel_zero_point_only)3832 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, kernel_zero_point_only) {
3833 TEST_REQUIRES_X86_SSE41;
3834 for (size_t channels = 1; channels <= 40; channels += 7) {
3835 DWConvMicrokernelTester()
3836 .cr(8)
3837 .kr(9)
3838 .channels(channels)
3839 .width(3)
3840 .input_zero_point(0)
3841 .kernel_zero_point(255)
3842 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3843 }
3844 }
3845
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,input_offset)3846 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, input_offset) {
3847 TEST_REQUIRES_X86_SSE41;
3848 for (uint32_t channels = 16; channels < 128; channels += 24) {
3849 DWConvMicrokernelTester()
3850 .cr(8)
3851 .kr(9)
3852 .channels(channels)
3853 .input_offset(176)
3854 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3855 }
3856 }
3857
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16,zero)3858 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL16, zero) {
3859 TEST_REQUIRES_X86_SSE41;
3860 for (uint32_t mz = 0; mz < 9; mz++) {
3861 for (uint32_t channels = 16; channels < 128; channels += 24) {
3862 DWConvMicrokernelTester()
3863 .cr(8)
3864 .kr(9)
3865 .channels(channels)
3866 .input_offset(176)
3867 .zero_index(mz)
3868 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3869 }
3870 }
3871 }
3872 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3873
3874
3875 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_eq_8)3876 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_eq_8) {
3877 TEST_REQUIRES_X86_SSE41;
3878 DWConvMicrokernelTester()
3879 .cr(8)
3880 .kr(9)
3881 .channels(8)
3882 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3883 }
3884
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_div_8)3885 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_div_8) {
3886 TEST_REQUIRES_X86_SSE41;
3887 for (uint32_t channels = 16; channels < 128; channels += 24) {
3888 DWConvMicrokernelTester()
3889 .cr(8)
3890 .kr(9)
3891 .channels(channels)
3892 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3893 }
3894 }
3895
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_div_8_with_qmin)3896 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_div_8_with_qmin) {
3897 TEST_REQUIRES_X86_SSE41;
3898 for (uint32_t channels = 16; channels < 128; channels += 24) {
3899 DWConvMicrokernelTester()
3900 .cr(8)
3901 .kr(9)
3902 .channels(channels)
3903 .qmin(128)
3904 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3905 }
3906 }
3907
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_div_8_with_qmax)3908 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_div_8_with_qmax) {
3909 TEST_REQUIRES_X86_SSE41;
3910 for (uint32_t channels = 16; channels < 128; channels += 24) {
3911 DWConvMicrokernelTester()
3912 .cr(8)
3913 .kr(9)
3914 .channels(channels)
3915 .qmax(128)
3916 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3917 }
3918 }
3919
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_lt_8)3920 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_lt_8) {
3921 TEST_REQUIRES_X86_SSE41;
3922 for (uint32_t channels = 1; channels < 8; channels++) {
3923 DWConvMicrokernelTester()
3924 .cr(8)
3925 .kr(9)
3926 .channels(channels)
3927 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3928 }
3929 }
3930
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_gt_8)3931 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_gt_8) {
3932 TEST_REQUIRES_X86_SSE41;
3933 for (uint32_t channels = 9; channels < 16; channels++) {
3934 DWConvMicrokernelTester()
3935 .cr(8)
3936 .kr(9)
3937 .channels(channels)
3938 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3939 }
3940 }
3941
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_gt_8_with_qmin)3942 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_gt_8_with_qmin) {
3943 TEST_REQUIRES_X86_SSE41;
3944 for (uint32_t channels = 9; channels < 16; channels++) {
3945 DWConvMicrokernelTester()
3946 .cr(8)
3947 .kr(9)
3948 .channels(channels)
3949 .qmin(128)
3950 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3951 }
3952 }
3953
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,c_gt_8_with_qmax)3954 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, c_gt_8_with_qmax) {
3955 TEST_REQUIRES_X86_SSE41;
3956 for (uint32_t channels = 9; channels < 16; channels++) {
3957 DWConvMicrokernelTester()
3958 .cr(8)
3959 .kr(9)
3960 .channels(channels)
3961 .qmax(128)
3962 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3963 }
3964 }
3965
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,multipixel)3966 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel) {
3967 TEST_REQUIRES_X86_SSE41;
3968 for (size_t channels = 1; channels <= 40; channels += 7) {
3969 DWConvMicrokernelTester()
3970 .cr(8)
3971 .kr(9)
3972 .channels(channels)
3973 .width(3)
3974 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3975 }
3976 }
3977
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,multipixel_with_step)3978 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_step) {
3979 TEST_REQUIRES_X86_SSE41;
3980 for (size_t channels = 1; channels <= 40; channels += 7) {
3981 for (size_t step = 2; step <= 9; step++) {
3982 DWConvMicrokernelTester()
3983 .cr(8)
3984 .kr(9)
3985 .channels(channels)
3986 .width(3)
3987 .step(step)
3988 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
3989 }
3990 }
3991 }
3992
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,multipixel_with_output_stride)3993 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_output_stride) {
3994 TEST_REQUIRES_X86_SSE41;
3995 for (size_t channels = 1; channels <= 40; channels += 7) {
3996 DWConvMicrokernelTester()
3997 .cr(8)
3998 .kr(9)
3999 .channels(8)
4000 .width(5)
4001 .output_stride(43)
4002 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4003 }
4004 }
4005
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,multipixel_with_qmin)4006 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_qmin) {
4007 TEST_REQUIRES_X86_SSE41;
4008 for (size_t channels = 1; channels <= 40; channels += 7) {
4009 DWConvMicrokernelTester()
4010 .cr(8)
4011 .kr(9)
4012 .channels(channels)
4013 .width(3)
4014 .qmin(128)
4015 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4016 }
4017 }
4018
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,multipixel_with_qmax)4019 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, multipixel_with_qmax) {
4020 TEST_REQUIRES_X86_SSE41;
4021 for (size_t channels = 1; channels <= 40; channels += 7) {
4022 DWConvMicrokernelTester()
4023 .cr(8)
4024 .kr(9)
4025 .channels(channels)
4026 .width(3)
4027 .qmax(128)
4028 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4029 }
4030 }
4031
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,input_zero_point_only)4032 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, input_zero_point_only) {
4033 TEST_REQUIRES_X86_SSE41;
4034 for (size_t channels = 1; channels <= 40; channels += 7) {
4035 DWConvMicrokernelTester()
4036 .cr(8)
4037 .kr(9)
4038 .channels(channels)
4039 .width(3)
4040 .input_zero_point(255)
4041 .kernel_zero_point(0)
4042 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4043 }
4044 }
4045
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,kernel_zero_point_only)4046 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, kernel_zero_point_only) {
4047 TEST_REQUIRES_X86_SSE41;
4048 for (size_t channels = 1; channels <= 40; channels += 7) {
4049 DWConvMicrokernelTester()
4050 .cr(8)
4051 .kr(9)
4052 .channels(channels)
4053 .width(3)
4054 .input_zero_point(0)
4055 .kernel_zero_point(255)
4056 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4057 }
4058 }
4059
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,input_offset)4060 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, input_offset) {
4061 TEST_REQUIRES_X86_SSE41;
4062 for (uint32_t channels = 16; channels < 128; channels += 24) {
4063 DWConvMicrokernelTester()
4064 .cr(8)
4065 .kr(9)
4066 .channels(channels)
4067 .input_offset(176)
4068 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4069 }
4070 }
4071
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32,zero)4072 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__SSE41_MUL32, zero) {
4073 TEST_REQUIRES_X86_SSE41;
4074 for (uint32_t mz = 0; mz < 9; mz++) {
4075 for (uint32_t channels = 16; channels < 128; channels += 24) {
4076 DWConvMicrokernelTester()
4077 .cr(8)
4078 .kr(9)
4079 .channels(channels)
4080 .input_offset(176)
4081 .zero_index(mz)
4082 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4083 }
4084 }
4085 }
4086 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4087
4088
4089 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_eq_8)4090 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_eq_8) {
4091 TEST_REQUIRES_X86_SSE2;
4092 DWConvMicrokernelTester()
4093 .cr(8)
4094 .kr(25)
4095 .channels(8)
4096 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4097 }
4098
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_div_8)4099 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_div_8) {
4100 TEST_REQUIRES_X86_SSE2;
4101 for (uint32_t channels = 16; channels < 128; channels += 24) {
4102 DWConvMicrokernelTester()
4103 .cr(8)
4104 .kr(25)
4105 .channels(channels)
4106 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4107 }
4108 }
4109
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_div_8_with_qmin)4110 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_div_8_with_qmin) {
4111 TEST_REQUIRES_X86_SSE2;
4112 for (uint32_t channels = 16; channels < 128; channels += 24) {
4113 DWConvMicrokernelTester()
4114 .cr(8)
4115 .kr(25)
4116 .channels(channels)
4117 .qmin(128)
4118 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4119 }
4120 }
4121
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_div_8_with_qmax)4122 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_div_8_with_qmax) {
4123 TEST_REQUIRES_X86_SSE2;
4124 for (uint32_t channels = 16; channels < 128; channels += 24) {
4125 DWConvMicrokernelTester()
4126 .cr(8)
4127 .kr(25)
4128 .channels(channels)
4129 .qmax(128)
4130 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4131 }
4132 }
4133
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_lt_8)4134 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_lt_8) {
4135 TEST_REQUIRES_X86_SSE2;
4136 for (uint32_t channels = 1; channels < 8; channels++) {
4137 DWConvMicrokernelTester()
4138 .cr(8)
4139 .kr(25)
4140 .channels(channels)
4141 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4142 }
4143 }
4144
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_gt_8)4145 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_gt_8) {
4146 TEST_REQUIRES_X86_SSE2;
4147 for (uint32_t channels = 9; channels < 16; channels++) {
4148 DWConvMicrokernelTester()
4149 .cr(8)
4150 .kr(25)
4151 .channels(channels)
4152 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4153 }
4154 }
4155
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_gt_8_with_qmin)4156 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_gt_8_with_qmin) {
4157 TEST_REQUIRES_X86_SSE2;
4158 for (uint32_t channels = 9; channels < 16; channels++) {
4159 DWConvMicrokernelTester()
4160 .cr(8)
4161 .kr(25)
4162 .channels(channels)
4163 .qmin(128)
4164 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4165 }
4166 }
4167
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,c_gt_8_with_qmax)4168 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, c_gt_8_with_qmax) {
4169 TEST_REQUIRES_X86_SSE2;
4170 for (uint32_t channels = 9; channels < 16; channels++) {
4171 DWConvMicrokernelTester()
4172 .cr(8)
4173 .kr(25)
4174 .channels(channels)
4175 .qmax(128)
4176 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4177 }
4178 }
4179
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,multipixel)4180 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel) {
4181 TEST_REQUIRES_X86_SSE2;
4182 for (size_t channels = 1; channels <= 40; channels += 7) {
4183 DWConvMicrokernelTester()
4184 .cr(8)
4185 .kr(25)
4186 .channels(channels)
4187 .width(3)
4188 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4189 }
4190 }
4191
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,multipixel_with_step)4192 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_step) {
4193 TEST_REQUIRES_X86_SSE2;
4194 for (size_t channels = 1; channels <= 40; channels += 7) {
4195 for (size_t step = 2; step <= 25; step++) {
4196 DWConvMicrokernelTester()
4197 .cr(8)
4198 .kr(25)
4199 .channels(channels)
4200 .width(3)
4201 .step(step)
4202 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4203 }
4204 }
4205 }
4206
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,multipixel_with_output_stride)4207 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_output_stride) {
4208 TEST_REQUIRES_X86_SSE2;
4209 for (size_t channels = 1; channels <= 40; channels += 7) {
4210 DWConvMicrokernelTester()
4211 .cr(8)
4212 .kr(25)
4213 .channels(8)
4214 .width(5)
4215 .output_stride(43)
4216 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4217 }
4218 }
4219
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,multipixel_with_qmin)4220 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_qmin) {
4221 TEST_REQUIRES_X86_SSE2;
4222 for (size_t channels = 1; channels <= 40; channels += 7) {
4223 DWConvMicrokernelTester()
4224 .cr(8)
4225 .kr(25)
4226 .channels(channels)
4227 .width(3)
4228 .qmin(128)
4229 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4230 }
4231 }
4232
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,multipixel_with_qmax)4233 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, multipixel_with_qmax) {
4234 TEST_REQUIRES_X86_SSE2;
4235 for (size_t channels = 1; channels <= 40; channels += 7) {
4236 DWConvMicrokernelTester()
4237 .cr(8)
4238 .kr(25)
4239 .channels(channels)
4240 .width(3)
4241 .qmax(128)
4242 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4243 }
4244 }
4245
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,input_zero_point_only)4246 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, input_zero_point_only) {
4247 TEST_REQUIRES_X86_SSE2;
4248 for (size_t channels = 1; channels <= 40; channels += 7) {
4249 DWConvMicrokernelTester()
4250 .cr(8)
4251 .kr(25)
4252 .channels(channels)
4253 .width(3)
4254 .input_zero_point(255)
4255 .kernel_zero_point(0)
4256 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4257 }
4258 }
4259
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,kernel_zero_point_only)4260 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, kernel_zero_point_only) {
4261 TEST_REQUIRES_X86_SSE2;
4262 for (size_t channels = 1; channels <= 40; channels += 7) {
4263 DWConvMicrokernelTester()
4264 .cr(8)
4265 .kr(25)
4266 .channels(channels)
4267 .width(3)
4268 .input_zero_point(0)
4269 .kernel_zero_point(255)
4270 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4271 }
4272 }
4273
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,input_offset)4274 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, input_offset) {
4275 TEST_REQUIRES_X86_SSE2;
4276 for (uint32_t channels = 16; channels < 128; channels += 24) {
4277 DWConvMicrokernelTester()
4278 .cr(8)
4279 .kr(25)
4280 .channels(channels)
4281 .input_offset(176)
4282 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4283 }
4284 }
4285
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16,zero)4286 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE2_MUL16, zero) {
4287 TEST_REQUIRES_X86_SSE2;
4288 for (uint32_t mz = 0; mz < 25; mz++) {
4289 for (uint32_t channels = 16; channels < 128; channels += 24) {
4290 DWConvMicrokernelTester()
4291 .cr(8)
4292 .kr(25)
4293 .channels(channels)
4294 .input_offset(176)
4295 .zero_index(mz)
4296 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4297 }
4298 }
4299 }
4300 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4301
4302
4303 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_eq_8)4304 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_eq_8) {
4305 TEST_REQUIRES_X86_SSE41;
4306 DWConvMicrokernelTester()
4307 .cr(8)
4308 .kr(25)
4309 .channels(8)
4310 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4311 }
4312
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_div_8)4313 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_div_8) {
4314 TEST_REQUIRES_X86_SSE41;
4315 for (uint32_t channels = 16; channels < 128; channels += 24) {
4316 DWConvMicrokernelTester()
4317 .cr(8)
4318 .kr(25)
4319 .channels(channels)
4320 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4321 }
4322 }
4323
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_div_8_with_qmin)4324 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_div_8_with_qmin) {
4325 TEST_REQUIRES_X86_SSE41;
4326 for (uint32_t channels = 16; channels < 128; channels += 24) {
4327 DWConvMicrokernelTester()
4328 .cr(8)
4329 .kr(25)
4330 .channels(channels)
4331 .qmin(128)
4332 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4333 }
4334 }
4335
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_div_8_with_qmax)4336 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_div_8_with_qmax) {
4337 TEST_REQUIRES_X86_SSE41;
4338 for (uint32_t channels = 16; channels < 128; channels += 24) {
4339 DWConvMicrokernelTester()
4340 .cr(8)
4341 .kr(25)
4342 .channels(channels)
4343 .qmax(128)
4344 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4345 }
4346 }
4347
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_lt_8)4348 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_lt_8) {
4349 TEST_REQUIRES_X86_SSE41;
4350 for (uint32_t channels = 1; channels < 8; channels++) {
4351 DWConvMicrokernelTester()
4352 .cr(8)
4353 .kr(25)
4354 .channels(channels)
4355 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4356 }
4357 }
4358
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_gt_8)4359 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_gt_8) {
4360 TEST_REQUIRES_X86_SSE41;
4361 for (uint32_t channels = 9; channels < 16; channels++) {
4362 DWConvMicrokernelTester()
4363 .cr(8)
4364 .kr(25)
4365 .channels(channels)
4366 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4367 }
4368 }
4369
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_gt_8_with_qmin)4370 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_gt_8_with_qmin) {
4371 TEST_REQUIRES_X86_SSE41;
4372 for (uint32_t channels = 9; channels < 16; channels++) {
4373 DWConvMicrokernelTester()
4374 .cr(8)
4375 .kr(25)
4376 .channels(channels)
4377 .qmin(128)
4378 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4379 }
4380 }
4381
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,c_gt_8_with_qmax)4382 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, c_gt_8_with_qmax) {
4383 TEST_REQUIRES_X86_SSE41;
4384 for (uint32_t channels = 9; channels < 16; channels++) {
4385 DWConvMicrokernelTester()
4386 .cr(8)
4387 .kr(25)
4388 .channels(channels)
4389 .qmax(128)
4390 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4391 }
4392 }
4393
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,multipixel)4394 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel) {
4395 TEST_REQUIRES_X86_SSE41;
4396 for (size_t channels = 1; channels <= 40; channels += 7) {
4397 DWConvMicrokernelTester()
4398 .cr(8)
4399 .kr(25)
4400 .channels(channels)
4401 .width(3)
4402 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4403 }
4404 }
4405
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,multipixel_with_step)4406 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_step) {
4407 TEST_REQUIRES_X86_SSE41;
4408 for (size_t channels = 1; channels <= 40; channels += 7) {
4409 for (size_t step = 2; step <= 25; step++) {
4410 DWConvMicrokernelTester()
4411 .cr(8)
4412 .kr(25)
4413 .channels(channels)
4414 .width(3)
4415 .step(step)
4416 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4417 }
4418 }
4419 }
4420
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,multipixel_with_output_stride)4421 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_output_stride) {
4422 TEST_REQUIRES_X86_SSE41;
4423 for (size_t channels = 1; channels <= 40; channels += 7) {
4424 DWConvMicrokernelTester()
4425 .cr(8)
4426 .kr(25)
4427 .channels(8)
4428 .width(5)
4429 .output_stride(43)
4430 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4431 }
4432 }
4433
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,multipixel_with_qmin)4434 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_qmin) {
4435 TEST_REQUIRES_X86_SSE41;
4436 for (size_t channels = 1; channels <= 40; channels += 7) {
4437 DWConvMicrokernelTester()
4438 .cr(8)
4439 .kr(25)
4440 .channels(channels)
4441 .width(3)
4442 .qmin(128)
4443 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4444 }
4445 }
4446
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,multipixel_with_qmax)4447 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, multipixel_with_qmax) {
4448 TEST_REQUIRES_X86_SSE41;
4449 for (size_t channels = 1; channels <= 40; channels += 7) {
4450 DWConvMicrokernelTester()
4451 .cr(8)
4452 .kr(25)
4453 .channels(channels)
4454 .width(3)
4455 .qmax(128)
4456 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4457 }
4458 }
4459
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,input_zero_point_only)4460 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, input_zero_point_only) {
4461 TEST_REQUIRES_X86_SSE41;
4462 for (size_t channels = 1; channels <= 40; channels += 7) {
4463 DWConvMicrokernelTester()
4464 .cr(8)
4465 .kr(25)
4466 .channels(channels)
4467 .width(3)
4468 .input_zero_point(255)
4469 .kernel_zero_point(0)
4470 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4471 }
4472 }
4473
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,kernel_zero_point_only)4474 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, kernel_zero_point_only) {
4475 TEST_REQUIRES_X86_SSE41;
4476 for (size_t channels = 1; channels <= 40; channels += 7) {
4477 DWConvMicrokernelTester()
4478 .cr(8)
4479 .kr(25)
4480 .channels(channels)
4481 .width(3)
4482 .input_zero_point(0)
4483 .kernel_zero_point(255)
4484 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4485 }
4486 }
4487
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,input_offset)4488 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, input_offset) {
4489 TEST_REQUIRES_X86_SSE41;
4490 for (uint32_t channels = 16; channels < 128; channels += 24) {
4491 DWConvMicrokernelTester()
4492 .cr(8)
4493 .kr(25)
4494 .channels(channels)
4495 .input_offset(176)
4496 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4497 }
4498 }
4499
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16,zero)4500 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL16, zero) {
4501 TEST_REQUIRES_X86_SSE41;
4502 for (uint32_t mz = 0; mz < 25; mz++) {
4503 for (uint32_t channels = 16; channels < 128; channels += 24) {
4504 DWConvMicrokernelTester()
4505 .cr(8)
4506 .kr(25)
4507 .channels(channels)
4508 .input_offset(176)
4509 .zero_index(mz)
4510 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4511 }
4512 }
4513 }
4514 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4515
4516
4517 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_eq_8)4518 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_eq_8) {
4519 TEST_REQUIRES_X86_SSE41;
4520 DWConvMicrokernelTester()
4521 .cr(8)
4522 .kr(25)
4523 .channels(8)
4524 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4525 }
4526
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_div_8)4527 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_div_8) {
4528 TEST_REQUIRES_X86_SSE41;
4529 for (uint32_t channels = 16; channels < 128; channels += 24) {
4530 DWConvMicrokernelTester()
4531 .cr(8)
4532 .kr(25)
4533 .channels(channels)
4534 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4535 }
4536 }
4537
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_div_8_with_qmin)4538 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_div_8_with_qmin) {
4539 TEST_REQUIRES_X86_SSE41;
4540 for (uint32_t channels = 16; channels < 128; channels += 24) {
4541 DWConvMicrokernelTester()
4542 .cr(8)
4543 .kr(25)
4544 .channels(channels)
4545 .qmin(128)
4546 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4547 }
4548 }
4549
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_div_8_with_qmax)4550 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_div_8_with_qmax) {
4551 TEST_REQUIRES_X86_SSE41;
4552 for (uint32_t channels = 16; channels < 128; channels += 24) {
4553 DWConvMicrokernelTester()
4554 .cr(8)
4555 .kr(25)
4556 .channels(channels)
4557 .qmax(128)
4558 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4559 }
4560 }
4561
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_lt_8)4562 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_lt_8) {
4563 TEST_REQUIRES_X86_SSE41;
4564 for (uint32_t channels = 1; channels < 8; channels++) {
4565 DWConvMicrokernelTester()
4566 .cr(8)
4567 .kr(25)
4568 .channels(channels)
4569 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4570 }
4571 }
4572
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_gt_8)4573 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_gt_8) {
4574 TEST_REQUIRES_X86_SSE41;
4575 for (uint32_t channels = 9; channels < 16; channels++) {
4576 DWConvMicrokernelTester()
4577 .cr(8)
4578 .kr(25)
4579 .channels(channels)
4580 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4581 }
4582 }
4583
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_gt_8_with_qmin)4584 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_gt_8_with_qmin) {
4585 TEST_REQUIRES_X86_SSE41;
4586 for (uint32_t channels = 9; channels < 16; channels++) {
4587 DWConvMicrokernelTester()
4588 .cr(8)
4589 .kr(25)
4590 .channels(channels)
4591 .qmin(128)
4592 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4593 }
4594 }
4595
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,c_gt_8_with_qmax)4596 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, c_gt_8_with_qmax) {
4597 TEST_REQUIRES_X86_SSE41;
4598 for (uint32_t channels = 9; channels < 16; channels++) {
4599 DWConvMicrokernelTester()
4600 .cr(8)
4601 .kr(25)
4602 .channels(channels)
4603 .qmax(128)
4604 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4605 }
4606 }
4607
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,multipixel)4608 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel) {
4609 TEST_REQUIRES_X86_SSE41;
4610 for (size_t channels = 1; channels <= 40; channels += 7) {
4611 DWConvMicrokernelTester()
4612 .cr(8)
4613 .kr(25)
4614 .channels(channels)
4615 .width(3)
4616 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4617 }
4618 }
4619
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,multipixel_with_step)4620 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_step) {
4621 TEST_REQUIRES_X86_SSE41;
4622 for (size_t channels = 1; channels <= 40; channels += 7) {
4623 for (size_t step = 2; step <= 25; step++) {
4624 DWConvMicrokernelTester()
4625 .cr(8)
4626 .kr(25)
4627 .channels(channels)
4628 .width(3)
4629 .step(step)
4630 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4631 }
4632 }
4633 }
4634
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,multipixel_with_output_stride)4635 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_output_stride) {
4636 TEST_REQUIRES_X86_SSE41;
4637 for (size_t channels = 1; channels <= 40; channels += 7) {
4638 DWConvMicrokernelTester()
4639 .cr(8)
4640 .kr(25)
4641 .channels(8)
4642 .width(5)
4643 .output_stride(43)
4644 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4645 }
4646 }
4647
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,multipixel_with_qmin)4648 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_qmin) {
4649 TEST_REQUIRES_X86_SSE41;
4650 for (size_t channels = 1; channels <= 40; channels += 7) {
4651 DWConvMicrokernelTester()
4652 .cr(8)
4653 .kr(25)
4654 .channels(channels)
4655 .width(3)
4656 .qmin(128)
4657 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4658 }
4659 }
4660
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,multipixel_with_qmax)4661 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, multipixel_with_qmax) {
4662 TEST_REQUIRES_X86_SSE41;
4663 for (size_t channels = 1; channels <= 40; channels += 7) {
4664 DWConvMicrokernelTester()
4665 .cr(8)
4666 .kr(25)
4667 .channels(channels)
4668 .width(3)
4669 .qmax(128)
4670 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4671 }
4672 }
4673
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,input_zero_point_only)4674 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, input_zero_point_only) {
4675 TEST_REQUIRES_X86_SSE41;
4676 for (size_t channels = 1; channels <= 40; channels += 7) {
4677 DWConvMicrokernelTester()
4678 .cr(8)
4679 .kr(25)
4680 .channels(channels)
4681 .width(3)
4682 .input_zero_point(255)
4683 .kernel_zero_point(0)
4684 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4685 }
4686 }
4687
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,kernel_zero_point_only)4688 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, kernel_zero_point_only) {
4689 TEST_REQUIRES_X86_SSE41;
4690 for (size_t channels = 1; channels <= 40; channels += 7) {
4691 DWConvMicrokernelTester()
4692 .cr(8)
4693 .kr(25)
4694 .channels(channels)
4695 .width(3)
4696 .input_zero_point(0)
4697 .kernel_zero_point(255)
4698 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4699 }
4700 }
4701
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,input_offset)4702 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, input_offset) {
4703 TEST_REQUIRES_X86_SSE41;
4704 for (uint32_t channels = 16; channels < 128; channels += 24) {
4705 DWConvMicrokernelTester()
4706 .cr(8)
4707 .kr(25)
4708 .channels(channels)
4709 .input_offset(176)
4710 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4711 }
4712 }
4713
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32,zero)4714 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__SSE41_MUL32, zero) {
4715 TEST_REQUIRES_X86_SSE41;
4716 for (uint32_t mz = 0; mz < 25; mz++) {
4717 for (uint32_t channels = 16; channels < 128; channels += 24) {
4718 DWConvMicrokernelTester()
4719 .cr(8)
4720 .kr(25)
4721 .channels(channels)
4722 .input_offset(176)
4723 .zero_index(mz)
4724 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4725 }
4726 }
4727 }
4728 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4729
4730
4731 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_eq_16)4732 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_eq_16) {
4733 TEST_REQUIRES_X86_SSE2;
4734 DWConvMicrokernelTester()
4735 .cr(16)
4736 .kr(9)
4737 .channels(16)
4738 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4739 }
4740
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_div_16)4741 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_div_16) {
4742 TEST_REQUIRES_X86_SSE2;
4743 for (uint32_t channels = 32; channels < 256; channels += 48) {
4744 DWConvMicrokernelTester()
4745 .cr(16)
4746 .kr(9)
4747 .channels(channels)
4748 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4749 }
4750 }
4751
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_div_16_with_qmin)4752 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_div_16_with_qmin) {
4753 TEST_REQUIRES_X86_SSE2;
4754 for (uint32_t channels = 32; channels < 256; channels += 48) {
4755 DWConvMicrokernelTester()
4756 .cr(16)
4757 .kr(9)
4758 .channels(channels)
4759 .qmin(128)
4760 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4761 }
4762 }
4763
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_div_16_with_qmax)4764 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_div_16_with_qmax) {
4765 TEST_REQUIRES_X86_SSE2;
4766 for (uint32_t channels = 32; channels < 256; channels += 48) {
4767 DWConvMicrokernelTester()
4768 .cr(16)
4769 .kr(9)
4770 .channels(channels)
4771 .qmax(128)
4772 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4773 }
4774 }
4775
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_lt_16)4776 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_lt_16) {
4777 TEST_REQUIRES_X86_SSE2;
4778 for (uint32_t channels = 1; channels < 16; channels++) {
4779 DWConvMicrokernelTester()
4780 .cr(16)
4781 .kr(9)
4782 .channels(channels)
4783 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4784 }
4785 }
4786
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_gt_16)4787 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_gt_16) {
4788 TEST_REQUIRES_X86_SSE2;
4789 for (uint32_t channels = 17; channels < 32; channels++) {
4790 DWConvMicrokernelTester()
4791 .cr(16)
4792 .kr(9)
4793 .channels(channels)
4794 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4795 }
4796 }
4797
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_gt_16_with_qmin)4798 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_gt_16_with_qmin) {
4799 TEST_REQUIRES_X86_SSE2;
4800 for (uint32_t channels = 17; channels < 32; channels++) {
4801 DWConvMicrokernelTester()
4802 .cr(16)
4803 .kr(9)
4804 .channels(channels)
4805 .qmin(128)
4806 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4807 }
4808 }
4809
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,c_gt_16_with_qmax)4810 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, c_gt_16_with_qmax) {
4811 TEST_REQUIRES_X86_SSE2;
4812 for (uint32_t channels = 17; channels < 32; channels++) {
4813 DWConvMicrokernelTester()
4814 .cr(16)
4815 .kr(9)
4816 .channels(channels)
4817 .qmax(128)
4818 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4819 }
4820 }
4821
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,multipixel)4822 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel) {
4823 TEST_REQUIRES_X86_SSE2;
4824 for (size_t channels = 1; channels <= 80; channels += 15) {
4825 DWConvMicrokernelTester()
4826 .cr(16)
4827 .kr(9)
4828 .channels(channels)
4829 .width(3)
4830 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4831 }
4832 }
4833
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,multipixel_with_step)4834 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_step) {
4835 TEST_REQUIRES_X86_SSE2;
4836 for (size_t channels = 1; channels <= 80; channels += 15) {
4837 for (size_t step = 2; step <= 9; step++) {
4838 DWConvMicrokernelTester()
4839 .cr(16)
4840 .kr(9)
4841 .channels(channels)
4842 .width(3)
4843 .step(step)
4844 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4845 }
4846 }
4847 }
4848
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,multipixel_with_output_stride)4849 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_output_stride) {
4850 TEST_REQUIRES_X86_SSE2;
4851 for (size_t channels = 1; channels <= 80; channels += 15) {
4852 DWConvMicrokernelTester()
4853 .cr(16)
4854 .kr(9)
4855 .channels(16)
4856 .width(5)
4857 .output_stride(83)
4858 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4859 }
4860 }
4861
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,multipixel_with_qmin)4862 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_qmin) {
4863 TEST_REQUIRES_X86_SSE2;
4864 for (size_t channels = 1; channels <= 80; channels += 15) {
4865 DWConvMicrokernelTester()
4866 .cr(16)
4867 .kr(9)
4868 .channels(channels)
4869 .width(3)
4870 .qmin(128)
4871 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4872 }
4873 }
4874
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,multipixel_with_qmax)4875 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, multipixel_with_qmax) {
4876 TEST_REQUIRES_X86_SSE2;
4877 for (size_t channels = 1; channels <= 80; channels += 15) {
4878 DWConvMicrokernelTester()
4879 .cr(16)
4880 .kr(9)
4881 .channels(channels)
4882 .width(3)
4883 .qmax(128)
4884 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4885 }
4886 }
4887
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,input_zero_point_only)4888 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, input_zero_point_only) {
4889 TEST_REQUIRES_X86_SSE2;
4890 for (size_t channels = 1; channels <= 80; channels += 15) {
4891 DWConvMicrokernelTester()
4892 .cr(16)
4893 .kr(9)
4894 .channels(channels)
4895 .width(3)
4896 .input_zero_point(255)
4897 .kernel_zero_point(0)
4898 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4899 }
4900 }
4901
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,kernel_zero_point_only)4902 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, kernel_zero_point_only) {
4903 TEST_REQUIRES_X86_SSE2;
4904 for (size_t channels = 1; channels <= 80; channels += 15) {
4905 DWConvMicrokernelTester()
4906 .cr(16)
4907 .kr(9)
4908 .channels(channels)
4909 .width(3)
4910 .input_zero_point(0)
4911 .kernel_zero_point(255)
4912 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4913 }
4914 }
4915
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,input_offset)4916 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, input_offset) {
4917 TEST_REQUIRES_X86_SSE2;
4918 for (uint32_t channels = 32; channels < 256; channels += 48) {
4919 DWConvMicrokernelTester()
4920 .cr(16)
4921 .kr(9)
4922 .channels(channels)
4923 .input_offset(304)
4924 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4925 }
4926 }
4927
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16,zero)4928 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE2_MUL16, zero) {
4929 TEST_REQUIRES_X86_SSE2;
4930 for (uint32_t mz = 0; mz < 9; mz++) {
4931 for (uint32_t channels = 32; channels < 256; channels += 48) {
4932 DWConvMicrokernelTester()
4933 .cr(16)
4934 .kr(9)
4935 .channels(channels)
4936 .input_offset(304)
4937 .zero_index(mz)
4938 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4939 }
4940 }
4941 }
4942 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4943
4944
4945 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_eq_16)4946 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_eq_16) {
4947 TEST_REQUIRES_X86_SSE41;
4948 DWConvMicrokernelTester()
4949 .cr(16)
4950 .kr(9)
4951 .channels(16)
4952 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4953 }
4954
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_div_16)4955 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_div_16) {
4956 TEST_REQUIRES_X86_SSE41;
4957 for (uint32_t channels = 32; channels < 256; channels += 48) {
4958 DWConvMicrokernelTester()
4959 .cr(16)
4960 .kr(9)
4961 .channels(channels)
4962 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4963 }
4964 }
4965
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_div_16_with_qmin)4966 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_div_16_with_qmin) {
4967 TEST_REQUIRES_X86_SSE41;
4968 for (uint32_t channels = 32; channels < 256; channels += 48) {
4969 DWConvMicrokernelTester()
4970 .cr(16)
4971 .kr(9)
4972 .channels(channels)
4973 .qmin(128)
4974 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4975 }
4976 }
4977
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_div_16_with_qmax)4978 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_div_16_with_qmax) {
4979 TEST_REQUIRES_X86_SSE41;
4980 for (uint32_t channels = 32; channels < 256; channels += 48) {
4981 DWConvMicrokernelTester()
4982 .cr(16)
4983 .kr(9)
4984 .channels(channels)
4985 .qmax(128)
4986 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4987 }
4988 }
4989
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_lt_16)4990 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_lt_16) {
4991 TEST_REQUIRES_X86_SSE41;
4992 for (uint32_t channels = 1; channels < 16; channels++) {
4993 DWConvMicrokernelTester()
4994 .cr(16)
4995 .kr(9)
4996 .channels(channels)
4997 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
4998 }
4999 }
5000
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_gt_16)5001 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_gt_16) {
5002 TEST_REQUIRES_X86_SSE41;
5003 for (uint32_t channels = 17; channels < 32; channels++) {
5004 DWConvMicrokernelTester()
5005 .cr(16)
5006 .kr(9)
5007 .channels(channels)
5008 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5009 }
5010 }
5011
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_gt_16_with_qmin)5012 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_gt_16_with_qmin) {
5013 TEST_REQUIRES_X86_SSE41;
5014 for (uint32_t channels = 17; channels < 32; channels++) {
5015 DWConvMicrokernelTester()
5016 .cr(16)
5017 .kr(9)
5018 .channels(channels)
5019 .qmin(128)
5020 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5021 }
5022 }
5023
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,c_gt_16_with_qmax)5024 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, c_gt_16_with_qmax) {
5025 TEST_REQUIRES_X86_SSE41;
5026 for (uint32_t channels = 17; channels < 32; channels++) {
5027 DWConvMicrokernelTester()
5028 .cr(16)
5029 .kr(9)
5030 .channels(channels)
5031 .qmax(128)
5032 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5033 }
5034 }
5035
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,multipixel)5036 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel) {
5037 TEST_REQUIRES_X86_SSE41;
5038 for (size_t channels = 1; channels <= 80; channels += 15) {
5039 DWConvMicrokernelTester()
5040 .cr(16)
5041 .kr(9)
5042 .channels(channels)
5043 .width(3)
5044 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5045 }
5046 }
5047
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,multipixel_with_step)5048 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_step) {
5049 TEST_REQUIRES_X86_SSE41;
5050 for (size_t channels = 1; channels <= 80; channels += 15) {
5051 for (size_t step = 2; step <= 9; step++) {
5052 DWConvMicrokernelTester()
5053 .cr(16)
5054 .kr(9)
5055 .channels(channels)
5056 .width(3)
5057 .step(step)
5058 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5059 }
5060 }
5061 }
5062
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,multipixel_with_output_stride)5063 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_output_stride) {
5064 TEST_REQUIRES_X86_SSE41;
5065 for (size_t channels = 1; channels <= 80; channels += 15) {
5066 DWConvMicrokernelTester()
5067 .cr(16)
5068 .kr(9)
5069 .channels(16)
5070 .width(5)
5071 .output_stride(83)
5072 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5073 }
5074 }
5075
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,multipixel_with_qmin)5076 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_qmin) {
5077 TEST_REQUIRES_X86_SSE41;
5078 for (size_t channels = 1; channels <= 80; channels += 15) {
5079 DWConvMicrokernelTester()
5080 .cr(16)
5081 .kr(9)
5082 .channels(channels)
5083 .width(3)
5084 .qmin(128)
5085 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5086 }
5087 }
5088
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,multipixel_with_qmax)5089 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, multipixel_with_qmax) {
5090 TEST_REQUIRES_X86_SSE41;
5091 for (size_t channels = 1; channels <= 80; channels += 15) {
5092 DWConvMicrokernelTester()
5093 .cr(16)
5094 .kr(9)
5095 .channels(channels)
5096 .width(3)
5097 .qmax(128)
5098 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5099 }
5100 }
5101
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,input_zero_point_only)5102 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, input_zero_point_only) {
5103 TEST_REQUIRES_X86_SSE41;
5104 for (size_t channels = 1; channels <= 80; channels += 15) {
5105 DWConvMicrokernelTester()
5106 .cr(16)
5107 .kr(9)
5108 .channels(channels)
5109 .width(3)
5110 .input_zero_point(255)
5111 .kernel_zero_point(0)
5112 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5113 }
5114 }
5115
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,kernel_zero_point_only)5116 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, kernel_zero_point_only) {
5117 TEST_REQUIRES_X86_SSE41;
5118 for (size_t channels = 1; channels <= 80; channels += 15) {
5119 DWConvMicrokernelTester()
5120 .cr(16)
5121 .kr(9)
5122 .channels(channels)
5123 .width(3)
5124 .input_zero_point(0)
5125 .kernel_zero_point(255)
5126 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5127 }
5128 }
5129
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,input_offset)5130 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, input_offset) {
5131 TEST_REQUIRES_X86_SSE41;
5132 for (uint32_t channels = 32; channels < 256; channels += 48) {
5133 DWConvMicrokernelTester()
5134 .cr(16)
5135 .kr(9)
5136 .channels(channels)
5137 .input_offset(304)
5138 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5139 }
5140 }
5141
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16,zero)5142 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL16, zero) {
5143 TEST_REQUIRES_X86_SSE41;
5144 for (uint32_t mz = 0; mz < 9; mz++) {
5145 for (uint32_t channels = 32; channels < 256; channels += 48) {
5146 DWConvMicrokernelTester()
5147 .cr(16)
5148 .kr(9)
5149 .channels(channels)
5150 .input_offset(304)
5151 .zero_index(mz)
5152 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5153 }
5154 }
5155 }
5156 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5157
5158
5159 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_eq_16)5160 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_eq_16) {
5161 TEST_REQUIRES_X86_SSE41;
5162 DWConvMicrokernelTester()
5163 .cr(16)
5164 .kr(9)
5165 .channels(16)
5166 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5167 }
5168
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_div_16)5169 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_div_16) {
5170 TEST_REQUIRES_X86_SSE41;
5171 for (uint32_t channels = 32; channels < 256; channels += 48) {
5172 DWConvMicrokernelTester()
5173 .cr(16)
5174 .kr(9)
5175 .channels(channels)
5176 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5177 }
5178 }
5179
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_div_16_with_qmin)5180 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_div_16_with_qmin) {
5181 TEST_REQUIRES_X86_SSE41;
5182 for (uint32_t channels = 32; channels < 256; channels += 48) {
5183 DWConvMicrokernelTester()
5184 .cr(16)
5185 .kr(9)
5186 .channels(channels)
5187 .qmin(128)
5188 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5189 }
5190 }
5191
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_div_16_with_qmax)5192 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_div_16_with_qmax) {
5193 TEST_REQUIRES_X86_SSE41;
5194 for (uint32_t channels = 32; channels < 256; channels += 48) {
5195 DWConvMicrokernelTester()
5196 .cr(16)
5197 .kr(9)
5198 .channels(channels)
5199 .qmax(128)
5200 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5201 }
5202 }
5203
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_lt_16)5204 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_lt_16) {
5205 TEST_REQUIRES_X86_SSE41;
5206 for (uint32_t channels = 1; channels < 16; channels++) {
5207 DWConvMicrokernelTester()
5208 .cr(16)
5209 .kr(9)
5210 .channels(channels)
5211 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5212 }
5213 }
5214
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_gt_16)5215 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_gt_16) {
5216 TEST_REQUIRES_X86_SSE41;
5217 for (uint32_t channels = 17; channels < 32; channels++) {
5218 DWConvMicrokernelTester()
5219 .cr(16)
5220 .kr(9)
5221 .channels(channels)
5222 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5223 }
5224 }
5225
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_gt_16_with_qmin)5226 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_gt_16_with_qmin) {
5227 TEST_REQUIRES_X86_SSE41;
5228 for (uint32_t channels = 17; channels < 32; channels++) {
5229 DWConvMicrokernelTester()
5230 .cr(16)
5231 .kr(9)
5232 .channels(channels)
5233 .qmin(128)
5234 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5235 }
5236 }
5237
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,c_gt_16_with_qmax)5238 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, c_gt_16_with_qmax) {
5239 TEST_REQUIRES_X86_SSE41;
5240 for (uint32_t channels = 17; channels < 32; channels++) {
5241 DWConvMicrokernelTester()
5242 .cr(16)
5243 .kr(9)
5244 .channels(channels)
5245 .qmax(128)
5246 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5247 }
5248 }
5249
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,multipixel)5250 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel) {
5251 TEST_REQUIRES_X86_SSE41;
5252 for (size_t channels = 1; channels <= 80; channels += 15) {
5253 DWConvMicrokernelTester()
5254 .cr(16)
5255 .kr(9)
5256 .channels(channels)
5257 .width(3)
5258 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5259 }
5260 }
5261
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,multipixel_with_step)5262 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_step) {
5263 TEST_REQUIRES_X86_SSE41;
5264 for (size_t channels = 1; channels <= 80; channels += 15) {
5265 for (size_t step = 2; step <= 9; step++) {
5266 DWConvMicrokernelTester()
5267 .cr(16)
5268 .kr(9)
5269 .channels(channels)
5270 .width(3)
5271 .step(step)
5272 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5273 }
5274 }
5275 }
5276
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,multipixel_with_output_stride)5277 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_output_stride) {
5278 TEST_REQUIRES_X86_SSE41;
5279 for (size_t channels = 1; channels <= 80; channels += 15) {
5280 DWConvMicrokernelTester()
5281 .cr(16)
5282 .kr(9)
5283 .channels(16)
5284 .width(5)
5285 .output_stride(83)
5286 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5287 }
5288 }
5289
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,multipixel_with_qmin)5290 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_qmin) {
5291 TEST_REQUIRES_X86_SSE41;
5292 for (size_t channels = 1; channels <= 80; channels += 15) {
5293 DWConvMicrokernelTester()
5294 .cr(16)
5295 .kr(9)
5296 .channels(channels)
5297 .width(3)
5298 .qmin(128)
5299 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5300 }
5301 }
5302
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,multipixel_with_qmax)5303 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, multipixel_with_qmax) {
5304 TEST_REQUIRES_X86_SSE41;
5305 for (size_t channels = 1; channels <= 80; channels += 15) {
5306 DWConvMicrokernelTester()
5307 .cr(16)
5308 .kr(9)
5309 .channels(channels)
5310 .width(3)
5311 .qmax(128)
5312 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5313 }
5314 }
5315
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,input_zero_point_only)5316 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, input_zero_point_only) {
5317 TEST_REQUIRES_X86_SSE41;
5318 for (size_t channels = 1; channels <= 80; channels += 15) {
5319 DWConvMicrokernelTester()
5320 .cr(16)
5321 .kr(9)
5322 .channels(channels)
5323 .width(3)
5324 .input_zero_point(255)
5325 .kernel_zero_point(0)
5326 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5327 }
5328 }
5329
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,kernel_zero_point_only)5330 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, kernel_zero_point_only) {
5331 TEST_REQUIRES_X86_SSE41;
5332 for (size_t channels = 1; channels <= 80; channels += 15) {
5333 DWConvMicrokernelTester()
5334 .cr(16)
5335 .kr(9)
5336 .channels(channels)
5337 .width(3)
5338 .input_zero_point(0)
5339 .kernel_zero_point(255)
5340 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5341 }
5342 }
5343
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,input_offset)5344 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, input_offset) {
5345 TEST_REQUIRES_X86_SSE41;
5346 for (uint32_t channels = 32; channels < 256; channels += 48) {
5347 DWConvMicrokernelTester()
5348 .cr(16)
5349 .kr(9)
5350 .channels(channels)
5351 .input_offset(304)
5352 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5353 }
5354 }
5355
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32,zero)5356 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__SSE41_MUL32, zero) {
5357 TEST_REQUIRES_X86_SSE41;
5358 for (uint32_t mz = 0; mz < 9; mz++) {
5359 for (uint32_t channels = 32; channels < 256; channels += 48) {
5360 DWConvMicrokernelTester()
5361 .cr(16)
5362 .kr(9)
5363 .channels(channels)
5364 .input_offset(304)
5365 .zero_index(mz)
5366 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5367 }
5368 }
5369 }
5370 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5371
5372
5373 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_eq_16)5374 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_eq_16) {
5375 TEST_REQUIRES_X86_SSE2;
5376 DWConvMicrokernelTester()
5377 .cr(16)
5378 .kr(25)
5379 .channels(16)
5380 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5381 }
5382
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_div_16)5383 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_div_16) {
5384 TEST_REQUIRES_X86_SSE2;
5385 for (uint32_t channels = 32; channels < 256; channels += 48) {
5386 DWConvMicrokernelTester()
5387 .cr(16)
5388 .kr(25)
5389 .channels(channels)
5390 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5391 }
5392 }
5393
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_div_16_with_qmin)5394 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_div_16_with_qmin) {
5395 TEST_REQUIRES_X86_SSE2;
5396 for (uint32_t channels = 32; channels < 256; channels += 48) {
5397 DWConvMicrokernelTester()
5398 .cr(16)
5399 .kr(25)
5400 .channels(channels)
5401 .qmin(128)
5402 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5403 }
5404 }
5405
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_div_16_with_qmax)5406 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_div_16_with_qmax) {
5407 TEST_REQUIRES_X86_SSE2;
5408 for (uint32_t channels = 32; channels < 256; channels += 48) {
5409 DWConvMicrokernelTester()
5410 .cr(16)
5411 .kr(25)
5412 .channels(channels)
5413 .qmax(128)
5414 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5415 }
5416 }
5417
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_lt_16)5418 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_lt_16) {
5419 TEST_REQUIRES_X86_SSE2;
5420 for (uint32_t channels = 1; channels < 16; channels++) {
5421 DWConvMicrokernelTester()
5422 .cr(16)
5423 .kr(25)
5424 .channels(channels)
5425 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5426 }
5427 }
5428
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_gt_16)5429 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_gt_16) {
5430 TEST_REQUIRES_X86_SSE2;
5431 for (uint32_t channels = 17; channels < 32; channels++) {
5432 DWConvMicrokernelTester()
5433 .cr(16)
5434 .kr(25)
5435 .channels(channels)
5436 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5437 }
5438 }
5439
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_gt_16_with_qmin)5440 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_gt_16_with_qmin) {
5441 TEST_REQUIRES_X86_SSE2;
5442 for (uint32_t channels = 17; channels < 32; channels++) {
5443 DWConvMicrokernelTester()
5444 .cr(16)
5445 .kr(25)
5446 .channels(channels)
5447 .qmin(128)
5448 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5449 }
5450 }
5451
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,c_gt_16_with_qmax)5452 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, c_gt_16_with_qmax) {
5453 TEST_REQUIRES_X86_SSE2;
5454 for (uint32_t channels = 17; channels < 32; channels++) {
5455 DWConvMicrokernelTester()
5456 .cr(16)
5457 .kr(25)
5458 .channels(channels)
5459 .qmax(128)
5460 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5461 }
5462 }
5463
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,multipixel)5464 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel) {
5465 TEST_REQUIRES_X86_SSE2;
5466 for (size_t channels = 1; channels <= 80; channels += 15) {
5467 DWConvMicrokernelTester()
5468 .cr(16)
5469 .kr(25)
5470 .channels(channels)
5471 .width(3)
5472 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5473 }
5474 }
5475
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,multipixel_with_step)5476 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_step) {
5477 TEST_REQUIRES_X86_SSE2;
5478 for (size_t channels = 1; channels <= 80; channels += 15) {
5479 for (size_t step = 2; step <= 25; step++) {
5480 DWConvMicrokernelTester()
5481 .cr(16)
5482 .kr(25)
5483 .channels(channels)
5484 .width(3)
5485 .step(step)
5486 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5487 }
5488 }
5489 }
5490
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,multipixel_with_output_stride)5491 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_output_stride) {
5492 TEST_REQUIRES_X86_SSE2;
5493 for (size_t channels = 1; channels <= 80; channels += 15) {
5494 DWConvMicrokernelTester()
5495 .cr(16)
5496 .kr(25)
5497 .channels(16)
5498 .width(5)
5499 .output_stride(83)
5500 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5501 }
5502 }
5503
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,multipixel_with_qmin)5504 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_qmin) {
5505 TEST_REQUIRES_X86_SSE2;
5506 for (size_t channels = 1; channels <= 80; channels += 15) {
5507 DWConvMicrokernelTester()
5508 .cr(16)
5509 .kr(25)
5510 .channels(channels)
5511 .width(3)
5512 .qmin(128)
5513 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5514 }
5515 }
5516
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,multipixel_with_qmax)5517 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, multipixel_with_qmax) {
5518 TEST_REQUIRES_X86_SSE2;
5519 for (size_t channels = 1; channels <= 80; channels += 15) {
5520 DWConvMicrokernelTester()
5521 .cr(16)
5522 .kr(25)
5523 .channels(channels)
5524 .width(3)
5525 .qmax(128)
5526 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5527 }
5528 }
5529
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,input_zero_point_only)5530 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, input_zero_point_only) {
5531 TEST_REQUIRES_X86_SSE2;
5532 for (size_t channels = 1; channels <= 80; channels += 15) {
5533 DWConvMicrokernelTester()
5534 .cr(16)
5535 .kr(25)
5536 .channels(channels)
5537 .width(3)
5538 .input_zero_point(255)
5539 .kernel_zero_point(0)
5540 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5541 }
5542 }
5543
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,kernel_zero_point_only)5544 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, kernel_zero_point_only) {
5545 TEST_REQUIRES_X86_SSE2;
5546 for (size_t channels = 1; channels <= 80; channels += 15) {
5547 DWConvMicrokernelTester()
5548 .cr(16)
5549 .kr(25)
5550 .channels(channels)
5551 .width(3)
5552 .input_zero_point(0)
5553 .kernel_zero_point(255)
5554 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5555 }
5556 }
5557
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,input_offset)5558 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, input_offset) {
5559 TEST_REQUIRES_X86_SSE2;
5560 for (uint32_t channels = 32; channels < 256; channels += 48) {
5561 DWConvMicrokernelTester()
5562 .cr(16)
5563 .kr(25)
5564 .channels(channels)
5565 .input_offset(304)
5566 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5567 }
5568 }
5569
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16,zero)5570 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE2_MUL16, zero) {
5571 TEST_REQUIRES_X86_SSE2;
5572 for (uint32_t mz = 0; mz < 25; mz++) {
5573 for (uint32_t channels = 32; channels < 256; channels += 48) {
5574 DWConvMicrokernelTester()
5575 .cr(16)
5576 .kr(25)
5577 .channels(channels)
5578 .input_offset(304)
5579 .zero_index(mz)
5580 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse2_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5581 }
5582 }
5583 }
5584 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5585
5586
5587 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_eq_16)5588 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_eq_16) {
5589 TEST_REQUIRES_X86_SSE41;
5590 DWConvMicrokernelTester()
5591 .cr(16)
5592 .kr(25)
5593 .channels(16)
5594 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5595 }
5596
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_div_16)5597 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_div_16) {
5598 TEST_REQUIRES_X86_SSE41;
5599 for (uint32_t channels = 32; channels < 256; channels += 48) {
5600 DWConvMicrokernelTester()
5601 .cr(16)
5602 .kr(25)
5603 .channels(channels)
5604 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5605 }
5606 }
5607
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_div_16_with_qmin)5608 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_div_16_with_qmin) {
5609 TEST_REQUIRES_X86_SSE41;
5610 for (uint32_t channels = 32; channels < 256; channels += 48) {
5611 DWConvMicrokernelTester()
5612 .cr(16)
5613 .kr(25)
5614 .channels(channels)
5615 .qmin(128)
5616 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5617 }
5618 }
5619
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_div_16_with_qmax)5620 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_div_16_with_qmax) {
5621 TEST_REQUIRES_X86_SSE41;
5622 for (uint32_t channels = 32; channels < 256; channels += 48) {
5623 DWConvMicrokernelTester()
5624 .cr(16)
5625 .kr(25)
5626 .channels(channels)
5627 .qmax(128)
5628 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5629 }
5630 }
5631
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_lt_16)5632 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_lt_16) {
5633 TEST_REQUIRES_X86_SSE41;
5634 for (uint32_t channels = 1; channels < 16; channels++) {
5635 DWConvMicrokernelTester()
5636 .cr(16)
5637 .kr(25)
5638 .channels(channels)
5639 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5640 }
5641 }
5642
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_gt_16)5643 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_gt_16) {
5644 TEST_REQUIRES_X86_SSE41;
5645 for (uint32_t channels = 17; channels < 32; channels++) {
5646 DWConvMicrokernelTester()
5647 .cr(16)
5648 .kr(25)
5649 .channels(channels)
5650 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5651 }
5652 }
5653
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_gt_16_with_qmin)5654 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_gt_16_with_qmin) {
5655 TEST_REQUIRES_X86_SSE41;
5656 for (uint32_t channels = 17; channels < 32; channels++) {
5657 DWConvMicrokernelTester()
5658 .cr(16)
5659 .kr(25)
5660 .channels(channels)
5661 .qmin(128)
5662 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5663 }
5664 }
5665
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,c_gt_16_with_qmax)5666 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, c_gt_16_with_qmax) {
5667 TEST_REQUIRES_X86_SSE41;
5668 for (uint32_t channels = 17; channels < 32; channels++) {
5669 DWConvMicrokernelTester()
5670 .cr(16)
5671 .kr(25)
5672 .channels(channels)
5673 .qmax(128)
5674 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5675 }
5676 }
5677
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,multipixel)5678 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel) {
5679 TEST_REQUIRES_X86_SSE41;
5680 for (size_t channels = 1; channels <= 80; channels += 15) {
5681 DWConvMicrokernelTester()
5682 .cr(16)
5683 .kr(25)
5684 .channels(channels)
5685 .width(3)
5686 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5687 }
5688 }
5689
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,multipixel_with_step)5690 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_step) {
5691 TEST_REQUIRES_X86_SSE41;
5692 for (size_t channels = 1; channels <= 80; channels += 15) {
5693 for (size_t step = 2; step <= 25; step++) {
5694 DWConvMicrokernelTester()
5695 .cr(16)
5696 .kr(25)
5697 .channels(channels)
5698 .width(3)
5699 .step(step)
5700 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5701 }
5702 }
5703 }
5704
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,multipixel_with_output_stride)5705 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_output_stride) {
5706 TEST_REQUIRES_X86_SSE41;
5707 for (size_t channels = 1; channels <= 80; channels += 15) {
5708 DWConvMicrokernelTester()
5709 .cr(16)
5710 .kr(25)
5711 .channels(16)
5712 .width(5)
5713 .output_stride(83)
5714 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5715 }
5716 }
5717
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,multipixel_with_qmin)5718 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_qmin) {
5719 TEST_REQUIRES_X86_SSE41;
5720 for (size_t channels = 1; channels <= 80; channels += 15) {
5721 DWConvMicrokernelTester()
5722 .cr(16)
5723 .kr(25)
5724 .channels(channels)
5725 .width(3)
5726 .qmin(128)
5727 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5728 }
5729 }
5730
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,multipixel_with_qmax)5731 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, multipixel_with_qmax) {
5732 TEST_REQUIRES_X86_SSE41;
5733 for (size_t channels = 1; channels <= 80; channels += 15) {
5734 DWConvMicrokernelTester()
5735 .cr(16)
5736 .kr(25)
5737 .channels(channels)
5738 .width(3)
5739 .qmax(128)
5740 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5741 }
5742 }
5743
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,input_zero_point_only)5744 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, input_zero_point_only) {
5745 TEST_REQUIRES_X86_SSE41;
5746 for (size_t channels = 1; channels <= 80; channels += 15) {
5747 DWConvMicrokernelTester()
5748 .cr(16)
5749 .kr(25)
5750 .channels(channels)
5751 .width(3)
5752 .input_zero_point(255)
5753 .kernel_zero_point(0)
5754 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5755 }
5756 }
5757
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,kernel_zero_point_only)5758 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, kernel_zero_point_only) {
5759 TEST_REQUIRES_X86_SSE41;
5760 for (size_t channels = 1; channels <= 80; channels += 15) {
5761 DWConvMicrokernelTester()
5762 .cr(16)
5763 .kr(25)
5764 .channels(channels)
5765 .width(3)
5766 .input_zero_point(0)
5767 .kernel_zero_point(255)
5768 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5769 }
5770 }
5771
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,input_offset)5772 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, input_offset) {
5773 TEST_REQUIRES_X86_SSE41;
5774 for (uint32_t channels = 32; channels < 256; channels += 48) {
5775 DWConvMicrokernelTester()
5776 .cr(16)
5777 .kr(25)
5778 .channels(channels)
5779 .input_offset(304)
5780 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5781 }
5782 }
5783
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16,zero)5784 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL16, zero) {
5785 TEST_REQUIRES_X86_SSE41;
5786 for (uint32_t mz = 0; mz < 25; mz++) {
5787 for (uint32_t channels = 32; channels < 256; channels += 48) {
5788 DWConvMicrokernelTester()
5789 .cr(16)
5790 .kr(25)
5791 .channels(channels)
5792 .input_offset(304)
5793 .zero_index(mz)
5794 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5795 }
5796 }
5797 }
5798 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5799
5800
5801 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_eq_16)5802 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_eq_16) {
5803 TEST_REQUIRES_X86_SSE41;
5804 DWConvMicrokernelTester()
5805 .cr(16)
5806 .kr(25)
5807 .channels(16)
5808 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5809 }
5810
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_div_16)5811 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_div_16) {
5812 TEST_REQUIRES_X86_SSE41;
5813 for (uint32_t channels = 32; channels < 256; channels += 48) {
5814 DWConvMicrokernelTester()
5815 .cr(16)
5816 .kr(25)
5817 .channels(channels)
5818 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5819 }
5820 }
5821
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_div_16_with_qmin)5822 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_div_16_with_qmin) {
5823 TEST_REQUIRES_X86_SSE41;
5824 for (uint32_t channels = 32; channels < 256; channels += 48) {
5825 DWConvMicrokernelTester()
5826 .cr(16)
5827 .kr(25)
5828 .channels(channels)
5829 .qmin(128)
5830 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5831 }
5832 }
5833
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_div_16_with_qmax)5834 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_div_16_with_qmax) {
5835 TEST_REQUIRES_X86_SSE41;
5836 for (uint32_t channels = 32; channels < 256; channels += 48) {
5837 DWConvMicrokernelTester()
5838 .cr(16)
5839 .kr(25)
5840 .channels(channels)
5841 .qmax(128)
5842 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5843 }
5844 }
5845
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_lt_16)5846 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_lt_16) {
5847 TEST_REQUIRES_X86_SSE41;
5848 for (uint32_t channels = 1; channels < 16; channels++) {
5849 DWConvMicrokernelTester()
5850 .cr(16)
5851 .kr(25)
5852 .channels(channels)
5853 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5854 }
5855 }
5856
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_gt_16)5857 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_gt_16) {
5858 TEST_REQUIRES_X86_SSE41;
5859 for (uint32_t channels = 17; channels < 32; channels++) {
5860 DWConvMicrokernelTester()
5861 .cr(16)
5862 .kr(25)
5863 .channels(channels)
5864 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5865 }
5866 }
5867
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_gt_16_with_qmin)5868 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_gt_16_with_qmin) {
5869 TEST_REQUIRES_X86_SSE41;
5870 for (uint32_t channels = 17; channels < 32; channels++) {
5871 DWConvMicrokernelTester()
5872 .cr(16)
5873 .kr(25)
5874 .channels(channels)
5875 .qmin(128)
5876 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5877 }
5878 }
5879
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,c_gt_16_with_qmax)5880 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, c_gt_16_with_qmax) {
5881 TEST_REQUIRES_X86_SSE41;
5882 for (uint32_t channels = 17; channels < 32; channels++) {
5883 DWConvMicrokernelTester()
5884 .cr(16)
5885 .kr(25)
5886 .channels(channels)
5887 .qmax(128)
5888 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5889 }
5890 }
5891
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,multipixel)5892 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel) {
5893 TEST_REQUIRES_X86_SSE41;
5894 for (size_t channels = 1; channels <= 80; channels += 15) {
5895 DWConvMicrokernelTester()
5896 .cr(16)
5897 .kr(25)
5898 .channels(channels)
5899 .width(3)
5900 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5901 }
5902 }
5903
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,multipixel_with_step)5904 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_step) {
5905 TEST_REQUIRES_X86_SSE41;
5906 for (size_t channels = 1; channels <= 80; channels += 15) {
5907 for (size_t step = 2; step <= 25; step++) {
5908 DWConvMicrokernelTester()
5909 .cr(16)
5910 .kr(25)
5911 .channels(channels)
5912 .width(3)
5913 .step(step)
5914 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5915 }
5916 }
5917 }
5918
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,multipixel_with_output_stride)5919 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_output_stride) {
5920 TEST_REQUIRES_X86_SSE41;
5921 for (size_t channels = 1; channels <= 80; channels += 15) {
5922 DWConvMicrokernelTester()
5923 .cr(16)
5924 .kr(25)
5925 .channels(16)
5926 .width(5)
5927 .output_stride(83)
5928 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5929 }
5930 }
5931
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,multipixel_with_qmin)5932 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_qmin) {
5933 TEST_REQUIRES_X86_SSE41;
5934 for (size_t channels = 1; channels <= 80; channels += 15) {
5935 DWConvMicrokernelTester()
5936 .cr(16)
5937 .kr(25)
5938 .channels(channels)
5939 .width(3)
5940 .qmin(128)
5941 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5942 }
5943 }
5944
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,multipixel_with_qmax)5945 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, multipixel_with_qmax) {
5946 TEST_REQUIRES_X86_SSE41;
5947 for (size_t channels = 1; channels <= 80; channels += 15) {
5948 DWConvMicrokernelTester()
5949 .cr(16)
5950 .kr(25)
5951 .channels(channels)
5952 .width(3)
5953 .qmax(128)
5954 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5955 }
5956 }
5957
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,input_zero_point_only)5958 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, input_zero_point_only) {
5959 TEST_REQUIRES_X86_SSE41;
5960 for (size_t channels = 1; channels <= 80; channels += 15) {
5961 DWConvMicrokernelTester()
5962 .cr(16)
5963 .kr(25)
5964 .channels(channels)
5965 .width(3)
5966 .input_zero_point(255)
5967 .kernel_zero_point(0)
5968 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5969 }
5970 }
5971
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,kernel_zero_point_only)5972 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, kernel_zero_point_only) {
5973 TEST_REQUIRES_X86_SSE41;
5974 for (size_t channels = 1; channels <= 80; channels += 15) {
5975 DWConvMicrokernelTester()
5976 .cr(16)
5977 .kr(25)
5978 .channels(channels)
5979 .width(3)
5980 .input_zero_point(0)
5981 .kernel_zero_point(255)
5982 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5983 }
5984 }
5985
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,input_offset)5986 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, input_offset) {
5987 TEST_REQUIRES_X86_SSE41;
5988 for (uint32_t channels = 32; channels < 256; channels += 48) {
5989 DWConvMicrokernelTester()
5990 .cr(16)
5991 .kr(25)
5992 .channels(channels)
5993 .input_offset(304)
5994 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
5995 }
5996 }
5997
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32,zero)5998 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__SSE41_MUL32, zero) {
5999 TEST_REQUIRES_X86_SSE41;
6000 for (uint32_t mz = 0; mz < 25; mz++) {
6001 for (uint32_t channels = 32; channels < 256; channels += 48) {
6002 DWConvMicrokernelTester()
6003 .cr(16)
6004 .kr(25)
6005 .channels(channels)
6006 .input_offset(304)
6007 .zero_index(mz)
6008 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__sse41_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6009 }
6010 }
6011 }
6012 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6013
6014
6015 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_eq_8)6016 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_eq_8) {
6017 TEST_REQUIRES_X86_AVX;
6018 DWConvMicrokernelTester()
6019 .cr(8)
6020 .kr(9)
6021 .channels(8)
6022 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6023 }
6024
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_div_8)6025 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_div_8) {
6026 TEST_REQUIRES_X86_AVX;
6027 for (uint32_t channels = 16; channels < 128; channels += 24) {
6028 DWConvMicrokernelTester()
6029 .cr(8)
6030 .kr(9)
6031 .channels(channels)
6032 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6033 }
6034 }
6035
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_div_8_with_qmin)6036 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_div_8_with_qmin) {
6037 TEST_REQUIRES_X86_AVX;
6038 for (uint32_t channels = 16; channels < 128; channels += 24) {
6039 DWConvMicrokernelTester()
6040 .cr(8)
6041 .kr(9)
6042 .channels(channels)
6043 .qmin(128)
6044 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6045 }
6046 }
6047
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_div_8_with_qmax)6048 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_div_8_with_qmax) {
6049 TEST_REQUIRES_X86_AVX;
6050 for (uint32_t channels = 16; channels < 128; channels += 24) {
6051 DWConvMicrokernelTester()
6052 .cr(8)
6053 .kr(9)
6054 .channels(channels)
6055 .qmax(128)
6056 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6057 }
6058 }
6059
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_lt_8)6060 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_lt_8) {
6061 TEST_REQUIRES_X86_AVX;
6062 for (uint32_t channels = 1; channels < 8; channels++) {
6063 DWConvMicrokernelTester()
6064 .cr(8)
6065 .kr(9)
6066 .channels(channels)
6067 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6068 }
6069 }
6070
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_gt_8)6071 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_gt_8) {
6072 TEST_REQUIRES_X86_AVX;
6073 for (uint32_t channels = 9; channels < 16; channels++) {
6074 DWConvMicrokernelTester()
6075 .cr(8)
6076 .kr(9)
6077 .channels(channels)
6078 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6079 }
6080 }
6081
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_gt_8_with_qmin)6082 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_gt_8_with_qmin) {
6083 TEST_REQUIRES_X86_AVX;
6084 for (uint32_t channels = 9; channels < 16; channels++) {
6085 DWConvMicrokernelTester()
6086 .cr(8)
6087 .kr(9)
6088 .channels(channels)
6089 .qmin(128)
6090 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6091 }
6092 }
6093
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,c_gt_8_with_qmax)6094 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, c_gt_8_with_qmax) {
6095 TEST_REQUIRES_X86_AVX;
6096 for (uint32_t channels = 9; channels < 16; channels++) {
6097 DWConvMicrokernelTester()
6098 .cr(8)
6099 .kr(9)
6100 .channels(channels)
6101 .qmax(128)
6102 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6103 }
6104 }
6105
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,multipixel)6106 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel) {
6107 TEST_REQUIRES_X86_AVX;
6108 for (size_t channels = 1; channels <= 40; channels += 7) {
6109 DWConvMicrokernelTester()
6110 .cr(8)
6111 .kr(9)
6112 .channels(channels)
6113 .width(3)
6114 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6115 }
6116 }
6117
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,multipixel_with_step)6118 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_step) {
6119 TEST_REQUIRES_X86_AVX;
6120 for (size_t channels = 1; channels <= 40; channels += 7) {
6121 for (size_t step = 2; step <= 9; step++) {
6122 DWConvMicrokernelTester()
6123 .cr(8)
6124 .kr(9)
6125 .channels(channels)
6126 .width(3)
6127 .step(step)
6128 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6129 }
6130 }
6131 }
6132
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,multipixel_with_output_stride)6133 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_output_stride) {
6134 TEST_REQUIRES_X86_AVX;
6135 for (size_t channels = 1; channels <= 40; channels += 7) {
6136 DWConvMicrokernelTester()
6137 .cr(8)
6138 .kr(9)
6139 .channels(8)
6140 .width(5)
6141 .output_stride(43)
6142 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6143 }
6144 }
6145
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,multipixel_with_qmin)6146 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_qmin) {
6147 TEST_REQUIRES_X86_AVX;
6148 for (size_t channels = 1; channels <= 40; channels += 7) {
6149 DWConvMicrokernelTester()
6150 .cr(8)
6151 .kr(9)
6152 .channels(channels)
6153 .width(3)
6154 .qmin(128)
6155 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6156 }
6157 }
6158
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,multipixel_with_qmax)6159 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, multipixel_with_qmax) {
6160 TEST_REQUIRES_X86_AVX;
6161 for (size_t channels = 1; channels <= 40; channels += 7) {
6162 DWConvMicrokernelTester()
6163 .cr(8)
6164 .kr(9)
6165 .channels(channels)
6166 .width(3)
6167 .qmax(128)
6168 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6169 }
6170 }
6171
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,input_zero_point_only)6172 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, input_zero_point_only) {
6173 TEST_REQUIRES_X86_AVX;
6174 for (size_t channels = 1; channels <= 40; channels += 7) {
6175 DWConvMicrokernelTester()
6176 .cr(8)
6177 .kr(9)
6178 .channels(channels)
6179 .width(3)
6180 .input_zero_point(255)
6181 .kernel_zero_point(0)
6182 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6183 }
6184 }
6185
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,kernel_zero_point_only)6186 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, kernel_zero_point_only) {
6187 TEST_REQUIRES_X86_AVX;
6188 for (size_t channels = 1; channels <= 40; channels += 7) {
6189 DWConvMicrokernelTester()
6190 .cr(8)
6191 .kr(9)
6192 .channels(channels)
6193 .width(3)
6194 .input_zero_point(0)
6195 .kernel_zero_point(255)
6196 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6197 }
6198 }
6199
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,input_offset)6200 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, input_offset) {
6201 TEST_REQUIRES_X86_AVX;
6202 for (uint32_t channels = 16; channels < 128; channels += 24) {
6203 DWConvMicrokernelTester()
6204 .cr(8)
6205 .kr(9)
6206 .channels(channels)
6207 .input_offset(176)
6208 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6209 }
6210 }
6211
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16,zero)6212 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL16, zero) {
6213 TEST_REQUIRES_X86_AVX;
6214 for (uint32_t mz = 0; mz < 9; mz++) {
6215 for (uint32_t channels = 16; channels < 128; channels += 24) {
6216 DWConvMicrokernelTester()
6217 .cr(8)
6218 .kr(9)
6219 .channels(channels)
6220 .input_offset(176)
6221 .zero_index(mz)
6222 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6223 }
6224 }
6225 }
6226 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6227
6228
6229 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_eq_8)6230 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_eq_8) {
6231 TEST_REQUIRES_X86_AVX;
6232 DWConvMicrokernelTester()
6233 .cr(8)
6234 .kr(9)
6235 .channels(8)
6236 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6237 }
6238
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_div_8)6239 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_div_8) {
6240 TEST_REQUIRES_X86_AVX;
6241 for (uint32_t channels = 16; channels < 128; channels += 24) {
6242 DWConvMicrokernelTester()
6243 .cr(8)
6244 .kr(9)
6245 .channels(channels)
6246 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6247 }
6248 }
6249
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_div_8_with_qmin)6250 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_div_8_with_qmin) {
6251 TEST_REQUIRES_X86_AVX;
6252 for (uint32_t channels = 16; channels < 128; channels += 24) {
6253 DWConvMicrokernelTester()
6254 .cr(8)
6255 .kr(9)
6256 .channels(channels)
6257 .qmin(128)
6258 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6259 }
6260 }
6261
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_div_8_with_qmax)6262 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_div_8_with_qmax) {
6263 TEST_REQUIRES_X86_AVX;
6264 for (uint32_t channels = 16; channels < 128; channels += 24) {
6265 DWConvMicrokernelTester()
6266 .cr(8)
6267 .kr(9)
6268 .channels(channels)
6269 .qmax(128)
6270 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6271 }
6272 }
6273
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_lt_8)6274 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_lt_8) {
6275 TEST_REQUIRES_X86_AVX;
6276 for (uint32_t channels = 1; channels < 8; channels++) {
6277 DWConvMicrokernelTester()
6278 .cr(8)
6279 .kr(9)
6280 .channels(channels)
6281 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6282 }
6283 }
6284
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_gt_8)6285 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_gt_8) {
6286 TEST_REQUIRES_X86_AVX;
6287 for (uint32_t channels = 9; channels < 16; channels++) {
6288 DWConvMicrokernelTester()
6289 .cr(8)
6290 .kr(9)
6291 .channels(channels)
6292 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6293 }
6294 }
6295
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_gt_8_with_qmin)6296 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_gt_8_with_qmin) {
6297 TEST_REQUIRES_X86_AVX;
6298 for (uint32_t channels = 9; channels < 16; channels++) {
6299 DWConvMicrokernelTester()
6300 .cr(8)
6301 .kr(9)
6302 .channels(channels)
6303 .qmin(128)
6304 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6305 }
6306 }
6307
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,c_gt_8_with_qmax)6308 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, c_gt_8_with_qmax) {
6309 TEST_REQUIRES_X86_AVX;
6310 for (uint32_t channels = 9; channels < 16; channels++) {
6311 DWConvMicrokernelTester()
6312 .cr(8)
6313 .kr(9)
6314 .channels(channels)
6315 .qmax(128)
6316 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6317 }
6318 }
6319
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,multipixel)6320 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel) {
6321 TEST_REQUIRES_X86_AVX;
6322 for (size_t channels = 1; channels <= 40; channels += 7) {
6323 DWConvMicrokernelTester()
6324 .cr(8)
6325 .kr(9)
6326 .channels(channels)
6327 .width(3)
6328 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6329 }
6330 }
6331
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,multipixel_with_step)6332 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_step) {
6333 TEST_REQUIRES_X86_AVX;
6334 for (size_t channels = 1; channels <= 40; channels += 7) {
6335 for (size_t step = 2; step <= 9; step++) {
6336 DWConvMicrokernelTester()
6337 .cr(8)
6338 .kr(9)
6339 .channels(channels)
6340 .width(3)
6341 .step(step)
6342 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6343 }
6344 }
6345 }
6346
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,multipixel_with_output_stride)6347 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_output_stride) {
6348 TEST_REQUIRES_X86_AVX;
6349 for (size_t channels = 1; channels <= 40; channels += 7) {
6350 DWConvMicrokernelTester()
6351 .cr(8)
6352 .kr(9)
6353 .channels(8)
6354 .width(5)
6355 .output_stride(43)
6356 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6357 }
6358 }
6359
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,multipixel_with_qmin)6360 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_qmin) {
6361 TEST_REQUIRES_X86_AVX;
6362 for (size_t channels = 1; channels <= 40; channels += 7) {
6363 DWConvMicrokernelTester()
6364 .cr(8)
6365 .kr(9)
6366 .channels(channels)
6367 .width(3)
6368 .qmin(128)
6369 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6370 }
6371 }
6372
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,multipixel_with_qmax)6373 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, multipixel_with_qmax) {
6374 TEST_REQUIRES_X86_AVX;
6375 for (size_t channels = 1; channels <= 40; channels += 7) {
6376 DWConvMicrokernelTester()
6377 .cr(8)
6378 .kr(9)
6379 .channels(channels)
6380 .width(3)
6381 .qmax(128)
6382 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6383 }
6384 }
6385
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,input_zero_point_only)6386 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, input_zero_point_only) {
6387 TEST_REQUIRES_X86_AVX;
6388 for (size_t channels = 1; channels <= 40; channels += 7) {
6389 DWConvMicrokernelTester()
6390 .cr(8)
6391 .kr(9)
6392 .channels(channels)
6393 .width(3)
6394 .input_zero_point(255)
6395 .kernel_zero_point(0)
6396 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6397 }
6398 }
6399
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,kernel_zero_point_only)6400 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, kernel_zero_point_only) {
6401 TEST_REQUIRES_X86_AVX;
6402 for (size_t channels = 1; channels <= 40; channels += 7) {
6403 DWConvMicrokernelTester()
6404 .cr(8)
6405 .kr(9)
6406 .channels(channels)
6407 .width(3)
6408 .input_zero_point(0)
6409 .kernel_zero_point(255)
6410 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6411 }
6412 }
6413
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,input_offset)6414 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, input_offset) {
6415 TEST_REQUIRES_X86_AVX;
6416 for (uint32_t channels = 16; channels < 128; channels += 24) {
6417 DWConvMicrokernelTester()
6418 .cr(8)
6419 .kr(9)
6420 .channels(channels)
6421 .input_offset(176)
6422 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6423 }
6424 }
6425
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32,zero)6426 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX_MUL32, zero) {
6427 TEST_REQUIRES_X86_AVX;
6428 for (uint32_t mz = 0; mz < 9; mz++) {
6429 for (uint32_t channels = 16; channels < 128; channels += 24) {
6430 DWConvMicrokernelTester()
6431 .cr(8)
6432 .kr(9)
6433 .channels(channels)
6434 .input_offset(176)
6435 .zero_index(mz)
6436 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6437 }
6438 }
6439 }
6440 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6441
6442
6443 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_eq_8)6444 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_eq_8) {
6445 TEST_REQUIRES_X86_AVX2;
6446 DWConvMicrokernelTester()
6447 .cr(8)
6448 .kr(9)
6449 .channels(8)
6450 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
6451 }
6452
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_div_8)6453 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8) {
6454 TEST_REQUIRES_X86_AVX2;
6455 for (uint32_t channels = 16; channels < 128; channels += 24) {
6456 DWConvMicrokernelTester()
6457 .cr(8)
6458 .kr(9)
6459 .channels(channels)
6460 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
6461 }
6462 }
6463
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_div_8_with_qmin)6464 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8_with_qmin) {
6465 TEST_REQUIRES_X86_AVX2;
6466 for (uint32_t channels = 16; channels < 128; channels += 24) {
6467 DWConvMicrokernelTester()
6468 .cr(8)
6469 .kr(9)
6470 .channels(channels)
6471 .qmin(128)
6472 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
6473 }
6474 }
6475
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_div_8_with_qmax)6476 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8_with_qmax) {
6477 TEST_REQUIRES_X86_AVX2;
6478 for (uint32_t channels = 16; channels < 128; channels += 24) {
6479 DWConvMicrokernelTester()
6480 .cr(8)
6481 .kr(9)
6482 .channels(channels)
6483 .qmax(128)
6484 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
6485 }
6486 }
6487
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_lt_8)6488 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_lt_8) {
6489 TEST_REQUIRES_X86_AVX2;
6490 for (uint32_t channels = 1; channels < 8; channels++) {
6491 DWConvMicrokernelTester()
6492 .cr(8)
6493 .kr(9)
6494 .channels(channels)
6495 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
6496 }
6497 }
6498
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_gt_8)6499 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8) {
6500 TEST_REQUIRES_X86_AVX2;
6501 for (uint32_t channels = 9; channels < 16; channels++) {
6502 DWConvMicrokernelTester()
6503 .cr(8)
6504 .kr(9)
6505 .channels(channels)
6506 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
6507 }
6508 }
6509
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_gt_8_with_qmin)6510 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8_with_qmin) {
6511 TEST_REQUIRES_X86_AVX2;
6512 for (uint32_t channels = 9; channels < 16; channels++) {
6513 DWConvMicrokernelTester()
6514 .cr(8)
6515 .kr(9)
6516 .channels(channels)
6517 .qmin(128)
6518 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
6519 }
6520 }
6521
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,c_gt_8_with_qmax)6522 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8_with_qmax) {
6523 TEST_REQUIRES_X86_AVX2;
6524 for (uint32_t channels = 9; channels < 16; channels++) {
6525 DWConvMicrokernelTester()
6526 .cr(8)
6527 .kr(9)
6528 .channels(channels)
6529 .qmax(128)
6530 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
6531 }
6532 }
6533
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,multipixel)6534 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel) {
6535 TEST_REQUIRES_X86_AVX2;
6536 for (size_t channels = 1; channels <= 40; channels += 7) {
6537 DWConvMicrokernelTester()
6538 .cr(8)
6539 .kr(9)
6540 .channels(channels)
6541 .width(3)
6542 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
6543 }
6544 }
6545
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,multipixel_with_step)6546 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_step) {
6547 TEST_REQUIRES_X86_AVX2;
6548 for (size_t channels = 1; channels <= 40; channels += 7) {
6549 for (size_t step = 2; step <= 9; step++) {
6550 DWConvMicrokernelTester()
6551 .cr(8)
6552 .kr(9)
6553 .channels(channels)
6554 .width(3)
6555 .step(step)
6556 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
6557 }
6558 }
6559 }
6560
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,multipixel_with_output_stride)6561 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_output_stride) {
6562 TEST_REQUIRES_X86_AVX2;
6563 for (size_t channels = 1; channels <= 40; channels += 7) {
6564 DWConvMicrokernelTester()
6565 .cr(8)
6566 .kr(9)
6567 .channels(8)
6568 .width(5)
6569 .output_stride(43)
6570 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
6571 }
6572 }
6573
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,multipixel_with_qmin)6574 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_qmin) {
6575 TEST_REQUIRES_X86_AVX2;
6576 for (size_t channels = 1; channels <= 40; channels += 7) {
6577 DWConvMicrokernelTester()
6578 .cr(8)
6579 .kr(9)
6580 .channels(channels)
6581 .width(3)
6582 .qmin(128)
6583 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
6584 }
6585 }
6586
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,multipixel_with_qmax)6587 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_qmax) {
6588 TEST_REQUIRES_X86_AVX2;
6589 for (size_t channels = 1; channels <= 40; channels += 7) {
6590 DWConvMicrokernelTester()
6591 .cr(8)
6592 .kr(9)
6593 .channels(channels)
6594 .width(3)
6595 .qmax(128)
6596 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
6597 }
6598 }
6599
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,input_zero_point_only)6600 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, input_zero_point_only) {
6601 TEST_REQUIRES_X86_AVX2;
6602 for (size_t channels = 1; channels <= 40; channels += 7) {
6603 DWConvMicrokernelTester()
6604 .cr(8)
6605 .kr(9)
6606 .channels(channels)
6607 .width(3)
6608 .input_zero_point(255)
6609 .kernel_zero_point(0)
6610 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
6611 }
6612 }
6613
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,kernel_zero_point_only)6614 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, kernel_zero_point_only) {
6615 TEST_REQUIRES_X86_AVX2;
6616 for (size_t channels = 1; channels <= 40; channels += 7) {
6617 DWConvMicrokernelTester()
6618 .cr(8)
6619 .kr(9)
6620 .channels(channels)
6621 .width(3)
6622 .input_zero_point(0)
6623 .kernel_zero_point(255)
6624 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
6625 }
6626 }
6627
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,input_offset)6628 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, input_offset) {
6629 TEST_REQUIRES_X86_AVX2;
6630 for (uint32_t channels = 16; channels < 128; channels += 24) {
6631 DWConvMicrokernelTester()
6632 .cr(8)
6633 .kr(9)
6634 .channels(channels)
6635 .input_offset(176)
6636 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
6637 }
6638 }
6639
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32,zero)6640 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, zero) {
6641 TEST_REQUIRES_X86_AVX2;
6642 for (uint32_t mz = 0; mz < 9; mz++) {
6643 for (uint32_t channels = 16; channels < 128; channels += 24) {
6644 DWConvMicrokernelTester()
6645 .cr(8)
6646 .kr(9)
6647 .channels(channels)
6648 .input_offset(176)
6649 .zero_index(mz)
6650 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
6651 }
6652 }
6653 }
6654 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6655
6656
6657 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_eq_8)6658 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_eq_8) {
6659 TEST_REQUIRES_X86_XOP;
6660 DWConvMicrokernelTester()
6661 .cr(8)
6662 .kr(9)
6663 .channels(8)
6664 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6665 }
6666
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_div_8)6667 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_div_8) {
6668 TEST_REQUIRES_X86_XOP;
6669 for (uint32_t channels = 16; channels < 128; channels += 24) {
6670 DWConvMicrokernelTester()
6671 .cr(8)
6672 .kr(9)
6673 .channels(channels)
6674 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6675 }
6676 }
6677
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_div_8_with_qmin)6678 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_div_8_with_qmin) {
6679 TEST_REQUIRES_X86_XOP;
6680 for (uint32_t channels = 16; channels < 128; channels += 24) {
6681 DWConvMicrokernelTester()
6682 .cr(8)
6683 .kr(9)
6684 .channels(channels)
6685 .qmin(128)
6686 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6687 }
6688 }
6689
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_div_8_with_qmax)6690 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_div_8_with_qmax) {
6691 TEST_REQUIRES_X86_XOP;
6692 for (uint32_t channels = 16; channels < 128; channels += 24) {
6693 DWConvMicrokernelTester()
6694 .cr(8)
6695 .kr(9)
6696 .channels(channels)
6697 .qmax(128)
6698 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6699 }
6700 }
6701
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_lt_8)6702 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_lt_8) {
6703 TEST_REQUIRES_X86_XOP;
6704 for (uint32_t channels = 1; channels < 8; channels++) {
6705 DWConvMicrokernelTester()
6706 .cr(8)
6707 .kr(9)
6708 .channels(channels)
6709 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6710 }
6711 }
6712
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_gt_8)6713 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_gt_8) {
6714 TEST_REQUIRES_X86_XOP;
6715 for (uint32_t channels = 9; channels < 16; channels++) {
6716 DWConvMicrokernelTester()
6717 .cr(8)
6718 .kr(9)
6719 .channels(channels)
6720 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6721 }
6722 }
6723
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_gt_8_with_qmin)6724 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_gt_8_with_qmin) {
6725 TEST_REQUIRES_X86_XOP;
6726 for (uint32_t channels = 9; channels < 16; channels++) {
6727 DWConvMicrokernelTester()
6728 .cr(8)
6729 .kr(9)
6730 .channels(channels)
6731 .qmin(128)
6732 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6733 }
6734 }
6735
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,c_gt_8_with_qmax)6736 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, c_gt_8_with_qmax) {
6737 TEST_REQUIRES_X86_XOP;
6738 for (uint32_t channels = 9; channels < 16; channels++) {
6739 DWConvMicrokernelTester()
6740 .cr(8)
6741 .kr(9)
6742 .channels(channels)
6743 .qmax(128)
6744 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6745 }
6746 }
6747
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,multipixel)6748 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel) {
6749 TEST_REQUIRES_X86_XOP;
6750 for (size_t channels = 1; channels <= 40; channels += 7) {
6751 DWConvMicrokernelTester()
6752 .cr(8)
6753 .kr(9)
6754 .channels(channels)
6755 .width(3)
6756 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6757 }
6758 }
6759
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,multipixel_with_step)6760 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_step) {
6761 TEST_REQUIRES_X86_XOP;
6762 for (size_t channels = 1; channels <= 40; channels += 7) {
6763 for (size_t step = 2; step <= 9; step++) {
6764 DWConvMicrokernelTester()
6765 .cr(8)
6766 .kr(9)
6767 .channels(channels)
6768 .width(3)
6769 .step(step)
6770 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6771 }
6772 }
6773 }
6774
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,multipixel_with_output_stride)6775 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_output_stride) {
6776 TEST_REQUIRES_X86_XOP;
6777 for (size_t channels = 1; channels <= 40; channels += 7) {
6778 DWConvMicrokernelTester()
6779 .cr(8)
6780 .kr(9)
6781 .channels(8)
6782 .width(5)
6783 .output_stride(43)
6784 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6785 }
6786 }
6787
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,multipixel_with_qmin)6788 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_qmin) {
6789 TEST_REQUIRES_X86_XOP;
6790 for (size_t channels = 1; channels <= 40; channels += 7) {
6791 DWConvMicrokernelTester()
6792 .cr(8)
6793 .kr(9)
6794 .channels(channels)
6795 .width(3)
6796 .qmin(128)
6797 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6798 }
6799 }
6800
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,multipixel_with_qmax)6801 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, multipixel_with_qmax) {
6802 TEST_REQUIRES_X86_XOP;
6803 for (size_t channels = 1; channels <= 40; channels += 7) {
6804 DWConvMicrokernelTester()
6805 .cr(8)
6806 .kr(9)
6807 .channels(channels)
6808 .width(3)
6809 .qmax(128)
6810 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6811 }
6812 }
6813
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,input_zero_point_only)6814 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, input_zero_point_only) {
6815 TEST_REQUIRES_X86_XOP;
6816 for (size_t channels = 1; channels <= 40; channels += 7) {
6817 DWConvMicrokernelTester()
6818 .cr(8)
6819 .kr(9)
6820 .channels(channels)
6821 .width(3)
6822 .input_zero_point(255)
6823 .kernel_zero_point(0)
6824 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6825 }
6826 }
6827
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,kernel_zero_point_only)6828 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, kernel_zero_point_only) {
6829 TEST_REQUIRES_X86_XOP;
6830 for (size_t channels = 1; channels <= 40; channels += 7) {
6831 DWConvMicrokernelTester()
6832 .cr(8)
6833 .kr(9)
6834 .channels(channels)
6835 .width(3)
6836 .input_zero_point(0)
6837 .kernel_zero_point(255)
6838 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6839 }
6840 }
6841
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,input_offset)6842 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, input_offset) {
6843 TEST_REQUIRES_X86_XOP;
6844 for (uint32_t channels = 16; channels < 128; channels += 24) {
6845 DWConvMicrokernelTester()
6846 .cr(8)
6847 .kr(9)
6848 .channels(channels)
6849 .input_offset(176)
6850 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6851 }
6852 }
6853
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32,zero)6854 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__XOP_MUL32, zero) {
6855 TEST_REQUIRES_X86_XOP;
6856 for (uint32_t mz = 0; mz < 9; mz++) {
6857 for (uint32_t channels = 16; channels < 128; channels += 24) {
6858 DWConvMicrokernelTester()
6859 .cr(8)
6860 .kr(9)
6861 .channels(channels)
6862 .input_offset(176)
6863 .zero_index(mz)
6864 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6865 }
6866 }
6867 }
6868 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6869
6870
6871 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_eq_8)6872 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_eq_8) {
6873 TEST_REQUIRES_X86_AVX;
6874 DWConvMicrokernelTester()
6875 .cr(8)
6876 .kr(25)
6877 .channels(8)
6878 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6879 }
6880
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_div_8)6881 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_div_8) {
6882 TEST_REQUIRES_X86_AVX;
6883 for (uint32_t channels = 16; channels < 128; channels += 24) {
6884 DWConvMicrokernelTester()
6885 .cr(8)
6886 .kr(25)
6887 .channels(channels)
6888 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6889 }
6890 }
6891
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_div_8_with_qmin)6892 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_div_8_with_qmin) {
6893 TEST_REQUIRES_X86_AVX;
6894 for (uint32_t channels = 16; channels < 128; channels += 24) {
6895 DWConvMicrokernelTester()
6896 .cr(8)
6897 .kr(25)
6898 .channels(channels)
6899 .qmin(128)
6900 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6901 }
6902 }
6903
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_div_8_with_qmax)6904 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_div_8_with_qmax) {
6905 TEST_REQUIRES_X86_AVX;
6906 for (uint32_t channels = 16; channels < 128; channels += 24) {
6907 DWConvMicrokernelTester()
6908 .cr(8)
6909 .kr(25)
6910 .channels(channels)
6911 .qmax(128)
6912 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6913 }
6914 }
6915
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_lt_8)6916 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_lt_8) {
6917 TEST_REQUIRES_X86_AVX;
6918 for (uint32_t channels = 1; channels < 8; channels++) {
6919 DWConvMicrokernelTester()
6920 .cr(8)
6921 .kr(25)
6922 .channels(channels)
6923 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6924 }
6925 }
6926
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_gt_8)6927 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_gt_8) {
6928 TEST_REQUIRES_X86_AVX;
6929 for (uint32_t channels = 9; channels < 16; channels++) {
6930 DWConvMicrokernelTester()
6931 .cr(8)
6932 .kr(25)
6933 .channels(channels)
6934 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6935 }
6936 }
6937
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_gt_8_with_qmin)6938 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_gt_8_with_qmin) {
6939 TEST_REQUIRES_X86_AVX;
6940 for (uint32_t channels = 9; channels < 16; channels++) {
6941 DWConvMicrokernelTester()
6942 .cr(8)
6943 .kr(25)
6944 .channels(channels)
6945 .qmin(128)
6946 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6947 }
6948 }
6949
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,c_gt_8_with_qmax)6950 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, c_gt_8_with_qmax) {
6951 TEST_REQUIRES_X86_AVX;
6952 for (uint32_t channels = 9; channels < 16; channels++) {
6953 DWConvMicrokernelTester()
6954 .cr(8)
6955 .kr(25)
6956 .channels(channels)
6957 .qmax(128)
6958 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6959 }
6960 }
6961
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,multipixel)6962 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel) {
6963 TEST_REQUIRES_X86_AVX;
6964 for (size_t channels = 1; channels <= 40; channels += 7) {
6965 DWConvMicrokernelTester()
6966 .cr(8)
6967 .kr(25)
6968 .channels(channels)
6969 .width(3)
6970 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6971 }
6972 }
6973
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,multipixel_with_step)6974 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_step) {
6975 TEST_REQUIRES_X86_AVX;
6976 for (size_t channels = 1; channels <= 40; channels += 7) {
6977 for (size_t step = 2; step <= 25; step++) {
6978 DWConvMicrokernelTester()
6979 .cr(8)
6980 .kr(25)
6981 .channels(channels)
6982 .width(3)
6983 .step(step)
6984 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6985 }
6986 }
6987 }
6988
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,multipixel_with_output_stride)6989 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_output_stride) {
6990 TEST_REQUIRES_X86_AVX;
6991 for (size_t channels = 1; channels <= 40; channels += 7) {
6992 DWConvMicrokernelTester()
6993 .cr(8)
6994 .kr(25)
6995 .channels(8)
6996 .width(5)
6997 .output_stride(43)
6998 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
6999 }
7000 }
7001
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,multipixel_with_qmin)7002 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_qmin) {
7003 TEST_REQUIRES_X86_AVX;
7004 for (size_t channels = 1; channels <= 40; channels += 7) {
7005 DWConvMicrokernelTester()
7006 .cr(8)
7007 .kr(25)
7008 .channels(channels)
7009 .width(3)
7010 .qmin(128)
7011 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7012 }
7013 }
7014
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,multipixel_with_qmax)7015 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, multipixel_with_qmax) {
7016 TEST_REQUIRES_X86_AVX;
7017 for (size_t channels = 1; channels <= 40; channels += 7) {
7018 DWConvMicrokernelTester()
7019 .cr(8)
7020 .kr(25)
7021 .channels(channels)
7022 .width(3)
7023 .qmax(128)
7024 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7025 }
7026 }
7027
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,input_zero_point_only)7028 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, input_zero_point_only) {
7029 TEST_REQUIRES_X86_AVX;
7030 for (size_t channels = 1; channels <= 40; channels += 7) {
7031 DWConvMicrokernelTester()
7032 .cr(8)
7033 .kr(25)
7034 .channels(channels)
7035 .width(3)
7036 .input_zero_point(255)
7037 .kernel_zero_point(0)
7038 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7039 }
7040 }
7041
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,kernel_zero_point_only)7042 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, kernel_zero_point_only) {
7043 TEST_REQUIRES_X86_AVX;
7044 for (size_t channels = 1; channels <= 40; channels += 7) {
7045 DWConvMicrokernelTester()
7046 .cr(8)
7047 .kr(25)
7048 .channels(channels)
7049 .width(3)
7050 .input_zero_point(0)
7051 .kernel_zero_point(255)
7052 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7053 }
7054 }
7055
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,input_offset)7056 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, input_offset) {
7057 TEST_REQUIRES_X86_AVX;
7058 for (uint32_t channels = 16; channels < 128; channels += 24) {
7059 DWConvMicrokernelTester()
7060 .cr(8)
7061 .kr(25)
7062 .channels(channels)
7063 .input_offset(176)
7064 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7065 }
7066 }
7067
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16,zero)7068 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL16, zero) {
7069 TEST_REQUIRES_X86_AVX;
7070 for (uint32_t mz = 0; mz < 25; mz++) {
7071 for (uint32_t channels = 16; channels < 128; channels += 24) {
7072 DWConvMicrokernelTester()
7073 .cr(8)
7074 .kr(25)
7075 .channels(channels)
7076 .input_offset(176)
7077 .zero_index(mz)
7078 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7079 }
7080 }
7081 }
7082 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7083
7084
7085 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_eq_8)7086 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_eq_8) {
7087 TEST_REQUIRES_X86_AVX;
7088 DWConvMicrokernelTester()
7089 .cr(8)
7090 .kr(25)
7091 .channels(8)
7092 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7093 }
7094
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_div_8)7095 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_div_8) {
7096 TEST_REQUIRES_X86_AVX;
7097 for (uint32_t channels = 16; channels < 128; channels += 24) {
7098 DWConvMicrokernelTester()
7099 .cr(8)
7100 .kr(25)
7101 .channels(channels)
7102 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7103 }
7104 }
7105
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_div_8_with_qmin)7106 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_div_8_with_qmin) {
7107 TEST_REQUIRES_X86_AVX;
7108 for (uint32_t channels = 16; channels < 128; channels += 24) {
7109 DWConvMicrokernelTester()
7110 .cr(8)
7111 .kr(25)
7112 .channels(channels)
7113 .qmin(128)
7114 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7115 }
7116 }
7117
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_div_8_with_qmax)7118 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_div_8_with_qmax) {
7119 TEST_REQUIRES_X86_AVX;
7120 for (uint32_t channels = 16; channels < 128; channels += 24) {
7121 DWConvMicrokernelTester()
7122 .cr(8)
7123 .kr(25)
7124 .channels(channels)
7125 .qmax(128)
7126 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7127 }
7128 }
7129
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_lt_8)7130 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_lt_8) {
7131 TEST_REQUIRES_X86_AVX;
7132 for (uint32_t channels = 1; channels < 8; channels++) {
7133 DWConvMicrokernelTester()
7134 .cr(8)
7135 .kr(25)
7136 .channels(channels)
7137 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7138 }
7139 }
7140
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_gt_8)7141 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_gt_8) {
7142 TEST_REQUIRES_X86_AVX;
7143 for (uint32_t channels = 9; channels < 16; channels++) {
7144 DWConvMicrokernelTester()
7145 .cr(8)
7146 .kr(25)
7147 .channels(channels)
7148 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7149 }
7150 }
7151
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_gt_8_with_qmin)7152 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_gt_8_with_qmin) {
7153 TEST_REQUIRES_X86_AVX;
7154 for (uint32_t channels = 9; channels < 16; channels++) {
7155 DWConvMicrokernelTester()
7156 .cr(8)
7157 .kr(25)
7158 .channels(channels)
7159 .qmin(128)
7160 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7161 }
7162 }
7163
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,c_gt_8_with_qmax)7164 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, c_gt_8_with_qmax) {
7165 TEST_REQUIRES_X86_AVX;
7166 for (uint32_t channels = 9; channels < 16; channels++) {
7167 DWConvMicrokernelTester()
7168 .cr(8)
7169 .kr(25)
7170 .channels(channels)
7171 .qmax(128)
7172 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7173 }
7174 }
7175
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,multipixel)7176 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel) {
7177 TEST_REQUIRES_X86_AVX;
7178 for (size_t channels = 1; channels <= 40; channels += 7) {
7179 DWConvMicrokernelTester()
7180 .cr(8)
7181 .kr(25)
7182 .channels(channels)
7183 .width(3)
7184 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7185 }
7186 }
7187
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,multipixel_with_step)7188 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_step) {
7189 TEST_REQUIRES_X86_AVX;
7190 for (size_t channels = 1; channels <= 40; channels += 7) {
7191 for (size_t step = 2; step <= 25; step++) {
7192 DWConvMicrokernelTester()
7193 .cr(8)
7194 .kr(25)
7195 .channels(channels)
7196 .width(3)
7197 .step(step)
7198 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7199 }
7200 }
7201 }
7202
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,multipixel_with_output_stride)7203 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_output_stride) {
7204 TEST_REQUIRES_X86_AVX;
7205 for (size_t channels = 1; channels <= 40; channels += 7) {
7206 DWConvMicrokernelTester()
7207 .cr(8)
7208 .kr(25)
7209 .channels(8)
7210 .width(5)
7211 .output_stride(43)
7212 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7213 }
7214 }
7215
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,multipixel_with_qmin)7216 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_qmin) {
7217 TEST_REQUIRES_X86_AVX;
7218 for (size_t channels = 1; channels <= 40; channels += 7) {
7219 DWConvMicrokernelTester()
7220 .cr(8)
7221 .kr(25)
7222 .channels(channels)
7223 .width(3)
7224 .qmin(128)
7225 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7226 }
7227 }
7228
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,multipixel_with_qmax)7229 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, multipixel_with_qmax) {
7230 TEST_REQUIRES_X86_AVX;
7231 for (size_t channels = 1; channels <= 40; channels += 7) {
7232 DWConvMicrokernelTester()
7233 .cr(8)
7234 .kr(25)
7235 .channels(channels)
7236 .width(3)
7237 .qmax(128)
7238 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7239 }
7240 }
7241
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,input_zero_point_only)7242 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, input_zero_point_only) {
7243 TEST_REQUIRES_X86_AVX;
7244 for (size_t channels = 1; channels <= 40; channels += 7) {
7245 DWConvMicrokernelTester()
7246 .cr(8)
7247 .kr(25)
7248 .channels(channels)
7249 .width(3)
7250 .input_zero_point(255)
7251 .kernel_zero_point(0)
7252 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7253 }
7254 }
7255
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,kernel_zero_point_only)7256 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, kernel_zero_point_only) {
7257 TEST_REQUIRES_X86_AVX;
7258 for (size_t channels = 1; channels <= 40; channels += 7) {
7259 DWConvMicrokernelTester()
7260 .cr(8)
7261 .kr(25)
7262 .channels(channels)
7263 .width(3)
7264 .input_zero_point(0)
7265 .kernel_zero_point(255)
7266 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7267 }
7268 }
7269
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,input_offset)7270 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, input_offset) {
7271 TEST_REQUIRES_X86_AVX;
7272 for (uint32_t channels = 16; channels < 128; channels += 24) {
7273 DWConvMicrokernelTester()
7274 .cr(8)
7275 .kr(25)
7276 .channels(channels)
7277 .input_offset(176)
7278 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7279 }
7280 }
7281
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32,zero)7282 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX_MUL32, zero) {
7283 TEST_REQUIRES_X86_AVX;
7284 for (uint32_t mz = 0; mz < 25; mz++) {
7285 for (uint32_t channels = 16; channels < 128; channels += 24) {
7286 DWConvMicrokernelTester()
7287 .cr(8)
7288 .kr(25)
7289 .channels(channels)
7290 .input_offset(176)
7291 .zero_index(mz)
7292 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7293 }
7294 }
7295 }
7296 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7297
7298
7299 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_eq_8)7300 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_eq_8) {
7301 TEST_REQUIRES_X86_AVX2;
7302 DWConvMicrokernelTester()
7303 .cr(8)
7304 .kr(25)
7305 .channels(8)
7306 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
7307 }
7308
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_div_8)7309 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8) {
7310 TEST_REQUIRES_X86_AVX2;
7311 for (uint32_t channels = 16; channels < 128; channels += 24) {
7312 DWConvMicrokernelTester()
7313 .cr(8)
7314 .kr(25)
7315 .channels(channels)
7316 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
7317 }
7318 }
7319
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_div_8_with_qmin)7320 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8_with_qmin) {
7321 TEST_REQUIRES_X86_AVX2;
7322 for (uint32_t channels = 16; channels < 128; channels += 24) {
7323 DWConvMicrokernelTester()
7324 .cr(8)
7325 .kr(25)
7326 .channels(channels)
7327 .qmin(128)
7328 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
7329 }
7330 }
7331
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_div_8_with_qmax)7332 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8_with_qmax) {
7333 TEST_REQUIRES_X86_AVX2;
7334 for (uint32_t channels = 16; channels < 128; channels += 24) {
7335 DWConvMicrokernelTester()
7336 .cr(8)
7337 .kr(25)
7338 .channels(channels)
7339 .qmax(128)
7340 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
7341 }
7342 }
7343
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_lt_8)7344 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_lt_8) {
7345 TEST_REQUIRES_X86_AVX2;
7346 for (uint32_t channels = 1; channels < 8; channels++) {
7347 DWConvMicrokernelTester()
7348 .cr(8)
7349 .kr(25)
7350 .channels(channels)
7351 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
7352 }
7353 }
7354
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_gt_8)7355 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8) {
7356 TEST_REQUIRES_X86_AVX2;
7357 for (uint32_t channels = 9; channels < 16; channels++) {
7358 DWConvMicrokernelTester()
7359 .cr(8)
7360 .kr(25)
7361 .channels(channels)
7362 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
7363 }
7364 }
7365
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_gt_8_with_qmin)7366 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8_with_qmin) {
7367 TEST_REQUIRES_X86_AVX2;
7368 for (uint32_t channels = 9; channels < 16; channels++) {
7369 DWConvMicrokernelTester()
7370 .cr(8)
7371 .kr(25)
7372 .channels(channels)
7373 .qmin(128)
7374 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
7375 }
7376 }
7377
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,c_gt_8_with_qmax)7378 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8_with_qmax) {
7379 TEST_REQUIRES_X86_AVX2;
7380 for (uint32_t channels = 9; channels < 16; channels++) {
7381 DWConvMicrokernelTester()
7382 .cr(8)
7383 .kr(25)
7384 .channels(channels)
7385 .qmax(128)
7386 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
7387 }
7388 }
7389
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,multipixel)7390 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel) {
7391 TEST_REQUIRES_X86_AVX2;
7392 for (size_t channels = 1; channels <= 40; channels += 7) {
7393 DWConvMicrokernelTester()
7394 .cr(8)
7395 .kr(25)
7396 .channels(channels)
7397 .width(3)
7398 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
7399 }
7400 }
7401
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,multipixel_with_step)7402 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_step) {
7403 TEST_REQUIRES_X86_AVX2;
7404 for (size_t channels = 1; channels <= 40; channels += 7) {
7405 for (size_t step = 2; step <= 25; step++) {
7406 DWConvMicrokernelTester()
7407 .cr(8)
7408 .kr(25)
7409 .channels(channels)
7410 .width(3)
7411 .step(step)
7412 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
7413 }
7414 }
7415 }
7416
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,multipixel_with_output_stride)7417 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_output_stride) {
7418 TEST_REQUIRES_X86_AVX2;
7419 for (size_t channels = 1; channels <= 40; channels += 7) {
7420 DWConvMicrokernelTester()
7421 .cr(8)
7422 .kr(25)
7423 .channels(8)
7424 .width(5)
7425 .output_stride(43)
7426 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
7427 }
7428 }
7429
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,multipixel_with_qmin)7430 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_qmin) {
7431 TEST_REQUIRES_X86_AVX2;
7432 for (size_t channels = 1; channels <= 40; channels += 7) {
7433 DWConvMicrokernelTester()
7434 .cr(8)
7435 .kr(25)
7436 .channels(channels)
7437 .width(3)
7438 .qmin(128)
7439 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
7440 }
7441 }
7442
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,multipixel_with_qmax)7443 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_qmax) {
7444 TEST_REQUIRES_X86_AVX2;
7445 for (size_t channels = 1; channels <= 40; channels += 7) {
7446 DWConvMicrokernelTester()
7447 .cr(8)
7448 .kr(25)
7449 .channels(channels)
7450 .width(3)
7451 .qmax(128)
7452 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
7453 }
7454 }
7455
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,input_zero_point_only)7456 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, input_zero_point_only) {
7457 TEST_REQUIRES_X86_AVX2;
7458 for (size_t channels = 1; channels <= 40; channels += 7) {
7459 DWConvMicrokernelTester()
7460 .cr(8)
7461 .kr(25)
7462 .channels(channels)
7463 .width(3)
7464 .input_zero_point(255)
7465 .kernel_zero_point(0)
7466 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
7467 }
7468 }
7469
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,kernel_zero_point_only)7470 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, kernel_zero_point_only) {
7471 TEST_REQUIRES_X86_AVX2;
7472 for (size_t channels = 1; channels <= 40; channels += 7) {
7473 DWConvMicrokernelTester()
7474 .cr(8)
7475 .kr(25)
7476 .channels(channels)
7477 .width(3)
7478 .input_zero_point(0)
7479 .kernel_zero_point(255)
7480 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
7481 }
7482 }
7483
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,input_offset)7484 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, input_offset) {
7485 TEST_REQUIRES_X86_AVX2;
7486 for (uint32_t channels = 16; channels < 128; channels += 24) {
7487 DWConvMicrokernelTester()
7488 .cr(8)
7489 .kr(25)
7490 .channels(channels)
7491 .input_offset(176)
7492 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
7493 }
7494 }
7495
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32,zero)7496 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, zero) {
7497 TEST_REQUIRES_X86_AVX2;
7498 for (uint32_t mz = 0; mz < 25; mz++) {
7499 for (uint32_t channels = 16; channels < 128; channels += 24) {
7500 DWConvMicrokernelTester()
7501 .cr(8)
7502 .kr(25)
7503 .channels(channels)
7504 .input_offset(176)
7505 .zero_index(mz)
7506 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
7507 }
7508 }
7509 }
7510 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7511
7512
7513 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_eq_8)7514 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_eq_8) {
7515 TEST_REQUIRES_X86_XOP;
7516 DWConvMicrokernelTester()
7517 .cr(8)
7518 .kr(25)
7519 .channels(8)
7520 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7521 }
7522
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_div_8)7523 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_div_8) {
7524 TEST_REQUIRES_X86_XOP;
7525 for (uint32_t channels = 16; channels < 128; channels += 24) {
7526 DWConvMicrokernelTester()
7527 .cr(8)
7528 .kr(25)
7529 .channels(channels)
7530 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7531 }
7532 }
7533
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_div_8_with_qmin)7534 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_div_8_with_qmin) {
7535 TEST_REQUIRES_X86_XOP;
7536 for (uint32_t channels = 16; channels < 128; channels += 24) {
7537 DWConvMicrokernelTester()
7538 .cr(8)
7539 .kr(25)
7540 .channels(channels)
7541 .qmin(128)
7542 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7543 }
7544 }
7545
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_div_8_with_qmax)7546 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_div_8_with_qmax) {
7547 TEST_REQUIRES_X86_XOP;
7548 for (uint32_t channels = 16; channels < 128; channels += 24) {
7549 DWConvMicrokernelTester()
7550 .cr(8)
7551 .kr(25)
7552 .channels(channels)
7553 .qmax(128)
7554 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7555 }
7556 }
7557
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_lt_8)7558 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_lt_8) {
7559 TEST_REQUIRES_X86_XOP;
7560 for (uint32_t channels = 1; channels < 8; channels++) {
7561 DWConvMicrokernelTester()
7562 .cr(8)
7563 .kr(25)
7564 .channels(channels)
7565 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7566 }
7567 }
7568
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_gt_8)7569 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_gt_8) {
7570 TEST_REQUIRES_X86_XOP;
7571 for (uint32_t channels = 9; channels < 16; channels++) {
7572 DWConvMicrokernelTester()
7573 .cr(8)
7574 .kr(25)
7575 .channels(channels)
7576 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7577 }
7578 }
7579
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_gt_8_with_qmin)7580 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_gt_8_with_qmin) {
7581 TEST_REQUIRES_X86_XOP;
7582 for (uint32_t channels = 9; channels < 16; channels++) {
7583 DWConvMicrokernelTester()
7584 .cr(8)
7585 .kr(25)
7586 .channels(channels)
7587 .qmin(128)
7588 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7589 }
7590 }
7591
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,c_gt_8_with_qmax)7592 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, c_gt_8_with_qmax) {
7593 TEST_REQUIRES_X86_XOP;
7594 for (uint32_t channels = 9; channels < 16; channels++) {
7595 DWConvMicrokernelTester()
7596 .cr(8)
7597 .kr(25)
7598 .channels(channels)
7599 .qmax(128)
7600 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7601 }
7602 }
7603
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,multipixel)7604 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel) {
7605 TEST_REQUIRES_X86_XOP;
7606 for (size_t channels = 1; channels <= 40; channels += 7) {
7607 DWConvMicrokernelTester()
7608 .cr(8)
7609 .kr(25)
7610 .channels(channels)
7611 .width(3)
7612 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7613 }
7614 }
7615
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,multipixel_with_step)7616 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_step) {
7617 TEST_REQUIRES_X86_XOP;
7618 for (size_t channels = 1; channels <= 40; channels += 7) {
7619 for (size_t step = 2; step <= 25; step++) {
7620 DWConvMicrokernelTester()
7621 .cr(8)
7622 .kr(25)
7623 .channels(channels)
7624 .width(3)
7625 .step(step)
7626 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7627 }
7628 }
7629 }
7630
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,multipixel_with_output_stride)7631 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_output_stride) {
7632 TEST_REQUIRES_X86_XOP;
7633 for (size_t channels = 1; channels <= 40; channels += 7) {
7634 DWConvMicrokernelTester()
7635 .cr(8)
7636 .kr(25)
7637 .channels(8)
7638 .width(5)
7639 .output_stride(43)
7640 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7641 }
7642 }
7643
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,multipixel_with_qmin)7644 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_qmin) {
7645 TEST_REQUIRES_X86_XOP;
7646 for (size_t channels = 1; channels <= 40; channels += 7) {
7647 DWConvMicrokernelTester()
7648 .cr(8)
7649 .kr(25)
7650 .channels(channels)
7651 .width(3)
7652 .qmin(128)
7653 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7654 }
7655 }
7656
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,multipixel_with_qmax)7657 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, multipixel_with_qmax) {
7658 TEST_REQUIRES_X86_XOP;
7659 for (size_t channels = 1; channels <= 40; channels += 7) {
7660 DWConvMicrokernelTester()
7661 .cr(8)
7662 .kr(25)
7663 .channels(channels)
7664 .width(3)
7665 .qmax(128)
7666 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7667 }
7668 }
7669
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,input_zero_point_only)7670 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, input_zero_point_only) {
7671 TEST_REQUIRES_X86_XOP;
7672 for (size_t channels = 1; channels <= 40; channels += 7) {
7673 DWConvMicrokernelTester()
7674 .cr(8)
7675 .kr(25)
7676 .channels(channels)
7677 .width(3)
7678 .input_zero_point(255)
7679 .kernel_zero_point(0)
7680 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7681 }
7682 }
7683
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,kernel_zero_point_only)7684 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, kernel_zero_point_only) {
7685 TEST_REQUIRES_X86_XOP;
7686 for (size_t channels = 1; channels <= 40; channels += 7) {
7687 DWConvMicrokernelTester()
7688 .cr(8)
7689 .kr(25)
7690 .channels(channels)
7691 .width(3)
7692 .input_zero_point(0)
7693 .kernel_zero_point(255)
7694 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7695 }
7696 }
7697
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,input_offset)7698 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, input_offset) {
7699 TEST_REQUIRES_X86_XOP;
7700 for (uint32_t channels = 16; channels < 128; channels += 24) {
7701 DWConvMicrokernelTester()
7702 .cr(8)
7703 .kr(25)
7704 .channels(channels)
7705 .input_offset(176)
7706 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7707 }
7708 }
7709
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32,zero)7710 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__XOP_MUL32, zero) {
7711 TEST_REQUIRES_X86_XOP;
7712 for (uint32_t mz = 0; mz < 25; mz++) {
7713 for (uint32_t channels = 16; channels < 128; channels += 24) {
7714 DWConvMicrokernelTester()
7715 .cr(8)
7716 .kr(25)
7717 .channels(channels)
7718 .input_offset(176)
7719 .zero_index(mz)
7720 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7721 }
7722 }
7723 }
7724 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7725
7726
7727 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_eq_16)7728 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_eq_16) {
7729 TEST_REQUIRES_X86_AVX;
7730 DWConvMicrokernelTester()
7731 .cr(16)
7732 .kr(9)
7733 .channels(16)
7734 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7735 }
7736
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_div_16)7737 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_div_16) {
7738 TEST_REQUIRES_X86_AVX;
7739 for (uint32_t channels = 32; channels < 256; channels += 48) {
7740 DWConvMicrokernelTester()
7741 .cr(16)
7742 .kr(9)
7743 .channels(channels)
7744 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7745 }
7746 }
7747
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_div_16_with_qmin)7748 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_div_16_with_qmin) {
7749 TEST_REQUIRES_X86_AVX;
7750 for (uint32_t channels = 32; channels < 256; channels += 48) {
7751 DWConvMicrokernelTester()
7752 .cr(16)
7753 .kr(9)
7754 .channels(channels)
7755 .qmin(128)
7756 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7757 }
7758 }
7759
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_div_16_with_qmax)7760 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_div_16_with_qmax) {
7761 TEST_REQUIRES_X86_AVX;
7762 for (uint32_t channels = 32; channels < 256; channels += 48) {
7763 DWConvMicrokernelTester()
7764 .cr(16)
7765 .kr(9)
7766 .channels(channels)
7767 .qmax(128)
7768 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7769 }
7770 }
7771
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_lt_16)7772 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_lt_16) {
7773 TEST_REQUIRES_X86_AVX;
7774 for (uint32_t channels = 1; channels < 16; channels++) {
7775 DWConvMicrokernelTester()
7776 .cr(16)
7777 .kr(9)
7778 .channels(channels)
7779 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7780 }
7781 }
7782
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_gt_16)7783 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_gt_16) {
7784 TEST_REQUIRES_X86_AVX;
7785 for (uint32_t channels = 17; channels < 32; channels++) {
7786 DWConvMicrokernelTester()
7787 .cr(16)
7788 .kr(9)
7789 .channels(channels)
7790 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7791 }
7792 }
7793
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_gt_16_with_qmin)7794 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_gt_16_with_qmin) {
7795 TEST_REQUIRES_X86_AVX;
7796 for (uint32_t channels = 17; channels < 32; channels++) {
7797 DWConvMicrokernelTester()
7798 .cr(16)
7799 .kr(9)
7800 .channels(channels)
7801 .qmin(128)
7802 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7803 }
7804 }
7805
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,c_gt_16_with_qmax)7806 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, c_gt_16_with_qmax) {
7807 TEST_REQUIRES_X86_AVX;
7808 for (uint32_t channels = 17; channels < 32; channels++) {
7809 DWConvMicrokernelTester()
7810 .cr(16)
7811 .kr(9)
7812 .channels(channels)
7813 .qmax(128)
7814 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7815 }
7816 }
7817
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,multipixel)7818 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel) {
7819 TEST_REQUIRES_X86_AVX;
7820 for (size_t channels = 1; channels <= 80; channels += 15) {
7821 DWConvMicrokernelTester()
7822 .cr(16)
7823 .kr(9)
7824 .channels(channels)
7825 .width(3)
7826 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7827 }
7828 }
7829
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,multipixel_with_step)7830 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_step) {
7831 TEST_REQUIRES_X86_AVX;
7832 for (size_t channels = 1; channels <= 80; channels += 15) {
7833 for (size_t step = 2; step <= 9; step++) {
7834 DWConvMicrokernelTester()
7835 .cr(16)
7836 .kr(9)
7837 .channels(channels)
7838 .width(3)
7839 .step(step)
7840 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7841 }
7842 }
7843 }
7844
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,multipixel_with_output_stride)7845 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_output_stride) {
7846 TEST_REQUIRES_X86_AVX;
7847 for (size_t channels = 1; channels <= 80; channels += 15) {
7848 DWConvMicrokernelTester()
7849 .cr(16)
7850 .kr(9)
7851 .channels(16)
7852 .width(5)
7853 .output_stride(83)
7854 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7855 }
7856 }
7857
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,multipixel_with_qmin)7858 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_qmin) {
7859 TEST_REQUIRES_X86_AVX;
7860 for (size_t channels = 1; channels <= 80; channels += 15) {
7861 DWConvMicrokernelTester()
7862 .cr(16)
7863 .kr(9)
7864 .channels(channels)
7865 .width(3)
7866 .qmin(128)
7867 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7868 }
7869 }
7870
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,multipixel_with_qmax)7871 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, multipixel_with_qmax) {
7872 TEST_REQUIRES_X86_AVX;
7873 for (size_t channels = 1; channels <= 80; channels += 15) {
7874 DWConvMicrokernelTester()
7875 .cr(16)
7876 .kr(9)
7877 .channels(channels)
7878 .width(3)
7879 .qmax(128)
7880 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7881 }
7882 }
7883
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,input_zero_point_only)7884 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, input_zero_point_only) {
7885 TEST_REQUIRES_X86_AVX;
7886 for (size_t channels = 1; channels <= 80; channels += 15) {
7887 DWConvMicrokernelTester()
7888 .cr(16)
7889 .kr(9)
7890 .channels(channels)
7891 .width(3)
7892 .input_zero_point(255)
7893 .kernel_zero_point(0)
7894 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7895 }
7896 }
7897
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,kernel_zero_point_only)7898 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, kernel_zero_point_only) {
7899 TEST_REQUIRES_X86_AVX;
7900 for (size_t channels = 1; channels <= 80; channels += 15) {
7901 DWConvMicrokernelTester()
7902 .cr(16)
7903 .kr(9)
7904 .channels(channels)
7905 .width(3)
7906 .input_zero_point(0)
7907 .kernel_zero_point(255)
7908 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7909 }
7910 }
7911
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,input_offset)7912 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, input_offset) {
7913 TEST_REQUIRES_X86_AVX;
7914 for (uint32_t channels = 32; channels < 256; channels += 48) {
7915 DWConvMicrokernelTester()
7916 .cr(16)
7917 .kr(9)
7918 .channels(channels)
7919 .input_offset(304)
7920 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7921 }
7922 }
7923
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16,zero)7924 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL16, zero) {
7925 TEST_REQUIRES_X86_AVX;
7926 for (uint32_t mz = 0; mz < 9; mz++) {
7927 for (uint32_t channels = 32; channels < 256; channels += 48) {
7928 DWConvMicrokernelTester()
7929 .cr(16)
7930 .kr(9)
7931 .channels(channels)
7932 .input_offset(304)
7933 .zero_index(mz)
7934 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7935 }
7936 }
7937 }
7938 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7939
7940
7941 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_eq_16)7942 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_eq_16) {
7943 TEST_REQUIRES_X86_AVX;
7944 DWConvMicrokernelTester()
7945 .cr(16)
7946 .kr(9)
7947 .channels(16)
7948 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7949 }
7950
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_div_16)7951 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_div_16) {
7952 TEST_REQUIRES_X86_AVX;
7953 for (uint32_t channels = 32; channels < 256; channels += 48) {
7954 DWConvMicrokernelTester()
7955 .cr(16)
7956 .kr(9)
7957 .channels(channels)
7958 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7959 }
7960 }
7961
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_div_16_with_qmin)7962 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_div_16_with_qmin) {
7963 TEST_REQUIRES_X86_AVX;
7964 for (uint32_t channels = 32; channels < 256; channels += 48) {
7965 DWConvMicrokernelTester()
7966 .cr(16)
7967 .kr(9)
7968 .channels(channels)
7969 .qmin(128)
7970 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7971 }
7972 }
7973
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_div_16_with_qmax)7974 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_div_16_with_qmax) {
7975 TEST_REQUIRES_X86_AVX;
7976 for (uint32_t channels = 32; channels < 256; channels += 48) {
7977 DWConvMicrokernelTester()
7978 .cr(16)
7979 .kr(9)
7980 .channels(channels)
7981 .qmax(128)
7982 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7983 }
7984 }
7985
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_lt_16)7986 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_lt_16) {
7987 TEST_REQUIRES_X86_AVX;
7988 for (uint32_t channels = 1; channels < 16; channels++) {
7989 DWConvMicrokernelTester()
7990 .cr(16)
7991 .kr(9)
7992 .channels(channels)
7993 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
7994 }
7995 }
7996
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_gt_16)7997 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_gt_16) {
7998 TEST_REQUIRES_X86_AVX;
7999 for (uint32_t channels = 17; channels < 32; channels++) {
8000 DWConvMicrokernelTester()
8001 .cr(16)
8002 .kr(9)
8003 .channels(channels)
8004 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8005 }
8006 }
8007
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_gt_16_with_qmin)8008 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_gt_16_with_qmin) {
8009 TEST_REQUIRES_X86_AVX;
8010 for (uint32_t channels = 17; channels < 32; channels++) {
8011 DWConvMicrokernelTester()
8012 .cr(16)
8013 .kr(9)
8014 .channels(channels)
8015 .qmin(128)
8016 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8017 }
8018 }
8019
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,c_gt_16_with_qmax)8020 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, c_gt_16_with_qmax) {
8021 TEST_REQUIRES_X86_AVX;
8022 for (uint32_t channels = 17; channels < 32; channels++) {
8023 DWConvMicrokernelTester()
8024 .cr(16)
8025 .kr(9)
8026 .channels(channels)
8027 .qmax(128)
8028 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8029 }
8030 }
8031
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,multipixel)8032 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel) {
8033 TEST_REQUIRES_X86_AVX;
8034 for (size_t channels = 1; channels <= 80; channels += 15) {
8035 DWConvMicrokernelTester()
8036 .cr(16)
8037 .kr(9)
8038 .channels(channels)
8039 .width(3)
8040 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8041 }
8042 }
8043
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,multipixel_with_step)8044 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_step) {
8045 TEST_REQUIRES_X86_AVX;
8046 for (size_t channels = 1; channels <= 80; channels += 15) {
8047 for (size_t step = 2; step <= 9; step++) {
8048 DWConvMicrokernelTester()
8049 .cr(16)
8050 .kr(9)
8051 .channels(channels)
8052 .width(3)
8053 .step(step)
8054 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8055 }
8056 }
8057 }
8058
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,multipixel_with_output_stride)8059 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_output_stride) {
8060 TEST_REQUIRES_X86_AVX;
8061 for (size_t channels = 1; channels <= 80; channels += 15) {
8062 DWConvMicrokernelTester()
8063 .cr(16)
8064 .kr(9)
8065 .channels(16)
8066 .width(5)
8067 .output_stride(83)
8068 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8069 }
8070 }
8071
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,multipixel_with_qmin)8072 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_qmin) {
8073 TEST_REQUIRES_X86_AVX;
8074 for (size_t channels = 1; channels <= 80; channels += 15) {
8075 DWConvMicrokernelTester()
8076 .cr(16)
8077 .kr(9)
8078 .channels(channels)
8079 .width(3)
8080 .qmin(128)
8081 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8082 }
8083 }
8084
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,multipixel_with_qmax)8085 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, multipixel_with_qmax) {
8086 TEST_REQUIRES_X86_AVX;
8087 for (size_t channels = 1; channels <= 80; channels += 15) {
8088 DWConvMicrokernelTester()
8089 .cr(16)
8090 .kr(9)
8091 .channels(channels)
8092 .width(3)
8093 .qmax(128)
8094 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8095 }
8096 }
8097
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,input_zero_point_only)8098 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, input_zero_point_only) {
8099 TEST_REQUIRES_X86_AVX;
8100 for (size_t channels = 1; channels <= 80; channels += 15) {
8101 DWConvMicrokernelTester()
8102 .cr(16)
8103 .kr(9)
8104 .channels(channels)
8105 .width(3)
8106 .input_zero_point(255)
8107 .kernel_zero_point(0)
8108 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8109 }
8110 }
8111
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,kernel_zero_point_only)8112 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, kernel_zero_point_only) {
8113 TEST_REQUIRES_X86_AVX;
8114 for (size_t channels = 1; channels <= 80; channels += 15) {
8115 DWConvMicrokernelTester()
8116 .cr(16)
8117 .kr(9)
8118 .channels(channels)
8119 .width(3)
8120 .input_zero_point(0)
8121 .kernel_zero_point(255)
8122 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8123 }
8124 }
8125
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,input_offset)8126 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, input_offset) {
8127 TEST_REQUIRES_X86_AVX;
8128 for (uint32_t channels = 32; channels < 256; channels += 48) {
8129 DWConvMicrokernelTester()
8130 .cr(16)
8131 .kr(9)
8132 .channels(channels)
8133 .input_offset(304)
8134 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8135 }
8136 }
8137
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32,zero)8138 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX_MUL32, zero) {
8139 TEST_REQUIRES_X86_AVX;
8140 for (uint32_t mz = 0; mz < 9; mz++) {
8141 for (uint32_t channels = 32; channels < 256; channels += 48) {
8142 DWConvMicrokernelTester()
8143 .cr(16)
8144 .kr(9)
8145 .channels(channels)
8146 .input_offset(304)
8147 .zero_index(mz)
8148 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8149 }
8150 }
8151 }
8152 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8153
8154
8155 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_eq_16)8156 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_eq_16) {
8157 TEST_REQUIRES_X86_AVX2;
8158 DWConvMicrokernelTester()
8159 .cr(16)
8160 .kr(9)
8161 .channels(16)
8162 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
8163 }
8164
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_div_16)8165 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16) {
8166 TEST_REQUIRES_X86_AVX2;
8167 for (uint32_t channels = 32; channels < 256; channels += 48) {
8168 DWConvMicrokernelTester()
8169 .cr(16)
8170 .kr(9)
8171 .channels(channels)
8172 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
8173 }
8174 }
8175
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_div_16_with_qmin)8176 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16_with_qmin) {
8177 TEST_REQUIRES_X86_AVX2;
8178 for (uint32_t channels = 32; channels < 256; channels += 48) {
8179 DWConvMicrokernelTester()
8180 .cr(16)
8181 .kr(9)
8182 .channels(channels)
8183 .qmin(128)
8184 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
8185 }
8186 }
8187
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_div_16_with_qmax)8188 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16_with_qmax) {
8189 TEST_REQUIRES_X86_AVX2;
8190 for (uint32_t channels = 32; channels < 256; channels += 48) {
8191 DWConvMicrokernelTester()
8192 .cr(16)
8193 .kr(9)
8194 .channels(channels)
8195 .qmax(128)
8196 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
8197 }
8198 }
8199
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_lt_16)8200 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_lt_16) {
8201 TEST_REQUIRES_X86_AVX2;
8202 for (uint32_t channels = 1; channels < 16; channels++) {
8203 DWConvMicrokernelTester()
8204 .cr(16)
8205 .kr(9)
8206 .channels(channels)
8207 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
8208 }
8209 }
8210
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_gt_16)8211 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16) {
8212 TEST_REQUIRES_X86_AVX2;
8213 for (uint32_t channels = 17; channels < 32; channels++) {
8214 DWConvMicrokernelTester()
8215 .cr(16)
8216 .kr(9)
8217 .channels(channels)
8218 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
8219 }
8220 }
8221
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_gt_16_with_qmin)8222 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16_with_qmin) {
8223 TEST_REQUIRES_X86_AVX2;
8224 for (uint32_t channels = 17; channels < 32; channels++) {
8225 DWConvMicrokernelTester()
8226 .cr(16)
8227 .kr(9)
8228 .channels(channels)
8229 .qmin(128)
8230 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
8231 }
8232 }
8233
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,c_gt_16_with_qmax)8234 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16_with_qmax) {
8235 TEST_REQUIRES_X86_AVX2;
8236 for (uint32_t channels = 17; channels < 32; channels++) {
8237 DWConvMicrokernelTester()
8238 .cr(16)
8239 .kr(9)
8240 .channels(channels)
8241 .qmax(128)
8242 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
8243 }
8244 }
8245
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,multipixel)8246 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel) {
8247 TEST_REQUIRES_X86_AVX2;
8248 for (size_t channels = 1; channels <= 80; channels += 15) {
8249 DWConvMicrokernelTester()
8250 .cr(16)
8251 .kr(9)
8252 .channels(channels)
8253 .width(3)
8254 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
8255 }
8256 }
8257
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,multipixel_with_step)8258 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_step) {
8259 TEST_REQUIRES_X86_AVX2;
8260 for (size_t channels = 1; channels <= 80; channels += 15) {
8261 for (size_t step = 2; step <= 9; step++) {
8262 DWConvMicrokernelTester()
8263 .cr(16)
8264 .kr(9)
8265 .channels(channels)
8266 .width(3)
8267 .step(step)
8268 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
8269 }
8270 }
8271 }
8272
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,multipixel_with_output_stride)8273 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_output_stride) {
8274 TEST_REQUIRES_X86_AVX2;
8275 for (size_t channels = 1; channels <= 80; channels += 15) {
8276 DWConvMicrokernelTester()
8277 .cr(16)
8278 .kr(9)
8279 .channels(16)
8280 .width(5)
8281 .output_stride(83)
8282 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
8283 }
8284 }
8285
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,multipixel_with_qmin)8286 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_qmin) {
8287 TEST_REQUIRES_X86_AVX2;
8288 for (size_t channels = 1; channels <= 80; channels += 15) {
8289 DWConvMicrokernelTester()
8290 .cr(16)
8291 .kr(9)
8292 .channels(channels)
8293 .width(3)
8294 .qmin(128)
8295 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
8296 }
8297 }
8298
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,multipixel_with_qmax)8299 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_qmax) {
8300 TEST_REQUIRES_X86_AVX2;
8301 for (size_t channels = 1; channels <= 80; channels += 15) {
8302 DWConvMicrokernelTester()
8303 .cr(16)
8304 .kr(9)
8305 .channels(channels)
8306 .width(3)
8307 .qmax(128)
8308 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
8309 }
8310 }
8311
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,input_zero_point_only)8312 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, input_zero_point_only) {
8313 TEST_REQUIRES_X86_AVX2;
8314 for (size_t channels = 1; channels <= 80; channels += 15) {
8315 DWConvMicrokernelTester()
8316 .cr(16)
8317 .kr(9)
8318 .channels(channels)
8319 .width(3)
8320 .input_zero_point(255)
8321 .kernel_zero_point(0)
8322 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
8323 }
8324 }
8325
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,kernel_zero_point_only)8326 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, kernel_zero_point_only) {
8327 TEST_REQUIRES_X86_AVX2;
8328 for (size_t channels = 1; channels <= 80; channels += 15) {
8329 DWConvMicrokernelTester()
8330 .cr(16)
8331 .kr(9)
8332 .channels(channels)
8333 .width(3)
8334 .input_zero_point(0)
8335 .kernel_zero_point(255)
8336 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
8337 }
8338 }
8339
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,input_offset)8340 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, input_offset) {
8341 TEST_REQUIRES_X86_AVX2;
8342 for (uint32_t channels = 32; channels < 256; channels += 48) {
8343 DWConvMicrokernelTester()
8344 .cr(16)
8345 .kr(9)
8346 .channels(channels)
8347 .input_offset(304)
8348 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
8349 }
8350 }
8351
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32,zero)8352 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, zero) {
8353 TEST_REQUIRES_X86_AVX2;
8354 for (uint32_t mz = 0; mz < 9; mz++) {
8355 for (uint32_t channels = 32; channels < 256; channels += 48) {
8356 DWConvMicrokernelTester()
8357 .cr(16)
8358 .kr(9)
8359 .channels(channels)
8360 .input_offset(304)
8361 .zero_index(mz)
8362 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
8363 }
8364 }
8365 }
8366 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8367
8368
8369 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_eq_16)8370 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_eq_16) {
8371 TEST_REQUIRES_X86_XOP;
8372 DWConvMicrokernelTester()
8373 .cr(16)
8374 .kr(9)
8375 .channels(16)
8376 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8377 }
8378
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_div_16)8379 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_div_16) {
8380 TEST_REQUIRES_X86_XOP;
8381 for (uint32_t channels = 32; channels < 256; channels += 48) {
8382 DWConvMicrokernelTester()
8383 .cr(16)
8384 .kr(9)
8385 .channels(channels)
8386 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8387 }
8388 }
8389
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_div_16_with_qmin)8390 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_div_16_with_qmin) {
8391 TEST_REQUIRES_X86_XOP;
8392 for (uint32_t channels = 32; channels < 256; channels += 48) {
8393 DWConvMicrokernelTester()
8394 .cr(16)
8395 .kr(9)
8396 .channels(channels)
8397 .qmin(128)
8398 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8399 }
8400 }
8401
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_div_16_with_qmax)8402 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_div_16_with_qmax) {
8403 TEST_REQUIRES_X86_XOP;
8404 for (uint32_t channels = 32; channels < 256; channels += 48) {
8405 DWConvMicrokernelTester()
8406 .cr(16)
8407 .kr(9)
8408 .channels(channels)
8409 .qmax(128)
8410 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8411 }
8412 }
8413
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_lt_16)8414 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_lt_16) {
8415 TEST_REQUIRES_X86_XOP;
8416 for (uint32_t channels = 1; channels < 16; channels++) {
8417 DWConvMicrokernelTester()
8418 .cr(16)
8419 .kr(9)
8420 .channels(channels)
8421 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8422 }
8423 }
8424
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_gt_16)8425 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_gt_16) {
8426 TEST_REQUIRES_X86_XOP;
8427 for (uint32_t channels = 17; channels < 32; channels++) {
8428 DWConvMicrokernelTester()
8429 .cr(16)
8430 .kr(9)
8431 .channels(channels)
8432 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8433 }
8434 }
8435
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_gt_16_with_qmin)8436 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_gt_16_with_qmin) {
8437 TEST_REQUIRES_X86_XOP;
8438 for (uint32_t channels = 17; channels < 32; channels++) {
8439 DWConvMicrokernelTester()
8440 .cr(16)
8441 .kr(9)
8442 .channels(channels)
8443 .qmin(128)
8444 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8445 }
8446 }
8447
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,c_gt_16_with_qmax)8448 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, c_gt_16_with_qmax) {
8449 TEST_REQUIRES_X86_XOP;
8450 for (uint32_t channels = 17; channels < 32; channels++) {
8451 DWConvMicrokernelTester()
8452 .cr(16)
8453 .kr(9)
8454 .channels(channels)
8455 .qmax(128)
8456 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8457 }
8458 }
8459
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,multipixel)8460 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel) {
8461 TEST_REQUIRES_X86_XOP;
8462 for (size_t channels = 1; channels <= 80; channels += 15) {
8463 DWConvMicrokernelTester()
8464 .cr(16)
8465 .kr(9)
8466 .channels(channels)
8467 .width(3)
8468 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8469 }
8470 }
8471
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,multipixel_with_step)8472 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_step) {
8473 TEST_REQUIRES_X86_XOP;
8474 for (size_t channels = 1; channels <= 80; channels += 15) {
8475 for (size_t step = 2; step <= 9; step++) {
8476 DWConvMicrokernelTester()
8477 .cr(16)
8478 .kr(9)
8479 .channels(channels)
8480 .width(3)
8481 .step(step)
8482 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8483 }
8484 }
8485 }
8486
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,multipixel_with_output_stride)8487 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_output_stride) {
8488 TEST_REQUIRES_X86_XOP;
8489 for (size_t channels = 1; channels <= 80; channels += 15) {
8490 DWConvMicrokernelTester()
8491 .cr(16)
8492 .kr(9)
8493 .channels(16)
8494 .width(5)
8495 .output_stride(83)
8496 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8497 }
8498 }
8499
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,multipixel_with_qmin)8500 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_qmin) {
8501 TEST_REQUIRES_X86_XOP;
8502 for (size_t channels = 1; channels <= 80; channels += 15) {
8503 DWConvMicrokernelTester()
8504 .cr(16)
8505 .kr(9)
8506 .channels(channels)
8507 .width(3)
8508 .qmin(128)
8509 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8510 }
8511 }
8512
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,multipixel_with_qmax)8513 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, multipixel_with_qmax) {
8514 TEST_REQUIRES_X86_XOP;
8515 for (size_t channels = 1; channels <= 80; channels += 15) {
8516 DWConvMicrokernelTester()
8517 .cr(16)
8518 .kr(9)
8519 .channels(channels)
8520 .width(3)
8521 .qmax(128)
8522 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8523 }
8524 }
8525
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,input_zero_point_only)8526 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, input_zero_point_only) {
8527 TEST_REQUIRES_X86_XOP;
8528 for (size_t channels = 1; channels <= 80; channels += 15) {
8529 DWConvMicrokernelTester()
8530 .cr(16)
8531 .kr(9)
8532 .channels(channels)
8533 .width(3)
8534 .input_zero_point(255)
8535 .kernel_zero_point(0)
8536 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8537 }
8538 }
8539
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,kernel_zero_point_only)8540 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, kernel_zero_point_only) {
8541 TEST_REQUIRES_X86_XOP;
8542 for (size_t channels = 1; channels <= 80; channels += 15) {
8543 DWConvMicrokernelTester()
8544 .cr(16)
8545 .kr(9)
8546 .channels(channels)
8547 .width(3)
8548 .input_zero_point(0)
8549 .kernel_zero_point(255)
8550 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8551 }
8552 }
8553
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,input_offset)8554 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, input_offset) {
8555 TEST_REQUIRES_X86_XOP;
8556 for (uint32_t channels = 32; channels < 256; channels += 48) {
8557 DWConvMicrokernelTester()
8558 .cr(16)
8559 .kr(9)
8560 .channels(channels)
8561 .input_offset(304)
8562 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8563 }
8564 }
8565
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32,zero)8566 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__XOP_MUL32, zero) {
8567 TEST_REQUIRES_X86_XOP;
8568 for (uint32_t mz = 0; mz < 9; mz++) {
8569 for (uint32_t channels = 32; channels < 256; channels += 48) {
8570 DWConvMicrokernelTester()
8571 .cr(16)
8572 .kr(9)
8573 .channels(channels)
8574 .input_offset(304)
8575 .zero_index(mz)
8576 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8577 }
8578 }
8579 }
8580 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8581
8582
8583 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_eq_16)8584 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_eq_16) {
8585 TEST_REQUIRES_X86_AVX;
8586 DWConvMicrokernelTester()
8587 .cr(16)
8588 .kr(25)
8589 .channels(16)
8590 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8591 }
8592
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_div_16)8593 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_div_16) {
8594 TEST_REQUIRES_X86_AVX;
8595 for (uint32_t channels = 32; channels < 256; channels += 48) {
8596 DWConvMicrokernelTester()
8597 .cr(16)
8598 .kr(25)
8599 .channels(channels)
8600 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8601 }
8602 }
8603
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_div_16_with_qmin)8604 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_div_16_with_qmin) {
8605 TEST_REQUIRES_X86_AVX;
8606 for (uint32_t channels = 32; channels < 256; channels += 48) {
8607 DWConvMicrokernelTester()
8608 .cr(16)
8609 .kr(25)
8610 .channels(channels)
8611 .qmin(128)
8612 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8613 }
8614 }
8615
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_div_16_with_qmax)8616 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_div_16_with_qmax) {
8617 TEST_REQUIRES_X86_AVX;
8618 for (uint32_t channels = 32; channels < 256; channels += 48) {
8619 DWConvMicrokernelTester()
8620 .cr(16)
8621 .kr(25)
8622 .channels(channels)
8623 .qmax(128)
8624 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8625 }
8626 }
8627
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_lt_16)8628 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_lt_16) {
8629 TEST_REQUIRES_X86_AVX;
8630 for (uint32_t channels = 1; channels < 16; channels++) {
8631 DWConvMicrokernelTester()
8632 .cr(16)
8633 .kr(25)
8634 .channels(channels)
8635 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8636 }
8637 }
8638
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_gt_16)8639 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_gt_16) {
8640 TEST_REQUIRES_X86_AVX;
8641 for (uint32_t channels = 17; channels < 32; channels++) {
8642 DWConvMicrokernelTester()
8643 .cr(16)
8644 .kr(25)
8645 .channels(channels)
8646 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8647 }
8648 }
8649
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_gt_16_with_qmin)8650 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_gt_16_with_qmin) {
8651 TEST_REQUIRES_X86_AVX;
8652 for (uint32_t channels = 17; channels < 32; channels++) {
8653 DWConvMicrokernelTester()
8654 .cr(16)
8655 .kr(25)
8656 .channels(channels)
8657 .qmin(128)
8658 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8659 }
8660 }
8661
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,c_gt_16_with_qmax)8662 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, c_gt_16_with_qmax) {
8663 TEST_REQUIRES_X86_AVX;
8664 for (uint32_t channels = 17; channels < 32; channels++) {
8665 DWConvMicrokernelTester()
8666 .cr(16)
8667 .kr(25)
8668 .channels(channels)
8669 .qmax(128)
8670 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8671 }
8672 }
8673
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,multipixel)8674 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel) {
8675 TEST_REQUIRES_X86_AVX;
8676 for (size_t channels = 1; channels <= 80; channels += 15) {
8677 DWConvMicrokernelTester()
8678 .cr(16)
8679 .kr(25)
8680 .channels(channels)
8681 .width(3)
8682 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8683 }
8684 }
8685
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,multipixel_with_step)8686 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_step) {
8687 TEST_REQUIRES_X86_AVX;
8688 for (size_t channels = 1; channels <= 80; channels += 15) {
8689 for (size_t step = 2; step <= 25; step++) {
8690 DWConvMicrokernelTester()
8691 .cr(16)
8692 .kr(25)
8693 .channels(channels)
8694 .width(3)
8695 .step(step)
8696 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8697 }
8698 }
8699 }
8700
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,multipixel_with_output_stride)8701 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_output_stride) {
8702 TEST_REQUIRES_X86_AVX;
8703 for (size_t channels = 1; channels <= 80; channels += 15) {
8704 DWConvMicrokernelTester()
8705 .cr(16)
8706 .kr(25)
8707 .channels(16)
8708 .width(5)
8709 .output_stride(83)
8710 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8711 }
8712 }
8713
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,multipixel_with_qmin)8714 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_qmin) {
8715 TEST_REQUIRES_X86_AVX;
8716 for (size_t channels = 1; channels <= 80; channels += 15) {
8717 DWConvMicrokernelTester()
8718 .cr(16)
8719 .kr(25)
8720 .channels(channels)
8721 .width(3)
8722 .qmin(128)
8723 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8724 }
8725 }
8726
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,multipixel_with_qmax)8727 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, multipixel_with_qmax) {
8728 TEST_REQUIRES_X86_AVX;
8729 for (size_t channels = 1; channels <= 80; channels += 15) {
8730 DWConvMicrokernelTester()
8731 .cr(16)
8732 .kr(25)
8733 .channels(channels)
8734 .width(3)
8735 .qmax(128)
8736 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8737 }
8738 }
8739
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,input_zero_point_only)8740 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, input_zero_point_only) {
8741 TEST_REQUIRES_X86_AVX;
8742 for (size_t channels = 1; channels <= 80; channels += 15) {
8743 DWConvMicrokernelTester()
8744 .cr(16)
8745 .kr(25)
8746 .channels(channels)
8747 .width(3)
8748 .input_zero_point(255)
8749 .kernel_zero_point(0)
8750 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8751 }
8752 }
8753
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,kernel_zero_point_only)8754 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, kernel_zero_point_only) {
8755 TEST_REQUIRES_X86_AVX;
8756 for (size_t channels = 1; channels <= 80; channels += 15) {
8757 DWConvMicrokernelTester()
8758 .cr(16)
8759 .kr(25)
8760 .channels(channels)
8761 .width(3)
8762 .input_zero_point(0)
8763 .kernel_zero_point(255)
8764 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8765 }
8766 }
8767
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,input_offset)8768 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, input_offset) {
8769 TEST_REQUIRES_X86_AVX;
8770 for (uint32_t channels = 32; channels < 256; channels += 48) {
8771 DWConvMicrokernelTester()
8772 .cr(16)
8773 .kr(25)
8774 .channels(channels)
8775 .input_offset(304)
8776 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8777 }
8778 }
8779
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16,zero)8780 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL16, zero) {
8781 TEST_REQUIRES_X86_AVX;
8782 for (uint32_t mz = 0; mz < 25; mz++) {
8783 for (uint32_t channels = 32; channels < 256; channels += 48) {
8784 DWConvMicrokernelTester()
8785 .cr(16)
8786 .kr(25)
8787 .channels(channels)
8788 .input_offset(304)
8789 .zero_index(mz)
8790 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul16, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8791 }
8792 }
8793 }
8794 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8795
8796
8797 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_eq_16)8798 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_eq_16) {
8799 TEST_REQUIRES_X86_AVX;
8800 DWConvMicrokernelTester()
8801 .cr(16)
8802 .kr(25)
8803 .channels(16)
8804 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8805 }
8806
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_div_16)8807 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_div_16) {
8808 TEST_REQUIRES_X86_AVX;
8809 for (uint32_t channels = 32; channels < 256; channels += 48) {
8810 DWConvMicrokernelTester()
8811 .cr(16)
8812 .kr(25)
8813 .channels(channels)
8814 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8815 }
8816 }
8817
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_div_16_with_qmin)8818 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_div_16_with_qmin) {
8819 TEST_REQUIRES_X86_AVX;
8820 for (uint32_t channels = 32; channels < 256; channels += 48) {
8821 DWConvMicrokernelTester()
8822 .cr(16)
8823 .kr(25)
8824 .channels(channels)
8825 .qmin(128)
8826 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8827 }
8828 }
8829
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_div_16_with_qmax)8830 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_div_16_with_qmax) {
8831 TEST_REQUIRES_X86_AVX;
8832 for (uint32_t channels = 32; channels < 256; channels += 48) {
8833 DWConvMicrokernelTester()
8834 .cr(16)
8835 .kr(25)
8836 .channels(channels)
8837 .qmax(128)
8838 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8839 }
8840 }
8841
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_lt_16)8842 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_lt_16) {
8843 TEST_REQUIRES_X86_AVX;
8844 for (uint32_t channels = 1; channels < 16; channels++) {
8845 DWConvMicrokernelTester()
8846 .cr(16)
8847 .kr(25)
8848 .channels(channels)
8849 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8850 }
8851 }
8852
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_gt_16)8853 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_gt_16) {
8854 TEST_REQUIRES_X86_AVX;
8855 for (uint32_t channels = 17; channels < 32; channels++) {
8856 DWConvMicrokernelTester()
8857 .cr(16)
8858 .kr(25)
8859 .channels(channels)
8860 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8861 }
8862 }
8863
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_gt_16_with_qmin)8864 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_gt_16_with_qmin) {
8865 TEST_REQUIRES_X86_AVX;
8866 for (uint32_t channels = 17; channels < 32; channels++) {
8867 DWConvMicrokernelTester()
8868 .cr(16)
8869 .kr(25)
8870 .channels(channels)
8871 .qmin(128)
8872 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8873 }
8874 }
8875
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,c_gt_16_with_qmax)8876 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, c_gt_16_with_qmax) {
8877 TEST_REQUIRES_X86_AVX;
8878 for (uint32_t channels = 17; channels < 32; channels++) {
8879 DWConvMicrokernelTester()
8880 .cr(16)
8881 .kr(25)
8882 .channels(channels)
8883 .qmax(128)
8884 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8885 }
8886 }
8887
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,multipixel)8888 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel) {
8889 TEST_REQUIRES_X86_AVX;
8890 for (size_t channels = 1; channels <= 80; channels += 15) {
8891 DWConvMicrokernelTester()
8892 .cr(16)
8893 .kr(25)
8894 .channels(channels)
8895 .width(3)
8896 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8897 }
8898 }
8899
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,multipixel_with_step)8900 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_step) {
8901 TEST_REQUIRES_X86_AVX;
8902 for (size_t channels = 1; channels <= 80; channels += 15) {
8903 for (size_t step = 2; step <= 25; step++) {
8904 DWConvMicrokernelTester()
8905 .cr(16)
8906 .kr(25)
8907 .channels(channels)
8908 .width(3)
8909 .step(step)
8910 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8911 }
8912 }
8913 }
8914
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,multipixel_with_output_stride)8915 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_output_stride) {
8916 TEST_REQUIRES_X86_AVX;
8917 for (size_t channels = 1; channels <= 80; channels += 15) {
8918 DWConvMicrokernelTester()
8919 .cr(16)
8920 .kr(25)
8921 .channels(16)
8922 .width(5)
8923 .output_stride(83)
8924 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8925 }
8926 }
8927
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,multipixel_with_qmin)8928 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_qmin) {
8929 TEST_REQUIRES_X86_AVX;
8930 for (size_t channels = 1; channels <= 80; channels += 15) {
8931 DWConvMicrokernelTester()
8932 .cr(16)
8933 .kr(25)
8934 .channels(channels)
8935 .width(3)
8936 .qmin(128)
8937 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8938 }
8939 }
8940
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,multipixel_with_qmax)8941 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, multipixel_with_qmax) {
8942 TEST_REQUIRES_X86_AVX;
8943 for (size_t channels = 1; channels <= 80; channels += 15) {
8944 DWConvMicrokernelTester()
8945 .cr(16)
8946 .kr(25)
8947 .channels(channels)
8948 .width(3)
8949 .qmax(128)
8950 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8951 }
8952 }
8953
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,input_zero_point_only)8954 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, input_zero_point_only) {
8955 TEST_REQUIRES_X86_AVX;
8956 for (size_t channels = 1; channels <= 80; channels += 15) {
8957 DWConvMicrokernelTester()
8958 .cr(16)
8959 .kr(25)
8960 .channels(channels)
8961 .width(3)
8962 .input_zero_point(255)
8963 .kernel_zero_point(0)
8964 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8965 }
8966 }
8967
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,kernel_zero_point_only)8968 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, kernel_zero_point_only) {
8969 TEST_REQUIRES_X86_AVX;
8970 for (size_t channels = 1; channels <= 80; channels += 15) {
8971 DWConvMicrokernelTester()
8972 .cr(16)
8973 .kr(25)
8974 .channels(channels)
8975 .width(3)
8976 .input_zero_point(0)
8977 .kernel_zero_point(255)
8978 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8979 }
8980 }
8981
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,input_offset)8982 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, input_offset) {
8983 TEST_REQUIRES_X86_AVX;
8984 for (uint32_t channels = 32; channels < 256; channels += 48) {
8985 DWConvMicrokernelTester()
8986 .cr(16)
8987 .kr(25)
8988 .channels(channels)
8989 .input_offset(304)
8990 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
8991 }
8992 }
8993
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32,zero)8994 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX_MUL32, zero) {
8995 TEST_REQUIRES_X86_AVX;
8996 for (uint32_t mz = 0; mz < 25; mz++) {
8997 for (uint32_t channels = 32; channels < 256; channels += 48) {
8998 DWConvMicrokernelTester()
8999 .cr(16)
9000 .kr(25)
9001 .channels(channels)
9002 .input_offset(304)
9003 .zero_index(mz)
9004 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9005 }
9006 }
9007 }
9008 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9009
9010
9011 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_eq_16)9012 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_eq_16) {
9013 TEST_REQUIRES_X86_AVX2;
9014 DWConvMicrokernelTester()
9015 .cr(16)
9016 .kr(25)
9017 .channels(16)
9018 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9019 }
9020
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_div_16)9021 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16) {
9022 TEST_REQUIRES_X86_AVX2;
9023 for (uint32_t channels = 32; channels < 256; channels += 48) {
9024 DWConvMicrokernelTester()
9025 .cr(16)
9026 .kr(25)
9027 .channels(channels)
9028 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9029 }
9030 }
9031
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_div_16_with_qmin)9032 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16_with_qmin) {
9033 TEST_REQUIRES_X86_AVX2;
9034 for (uint32_t channels = 32; channels < 256; channels += 48) {
9035 DWConvMicrokernelTester()
9036 .cr(16)
9037 .kr(25)
9038 .channels(channels)
9039 .qmin(128)
9040 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9041 }
9042 }
9043
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_div_16_with_qmax)9044 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16_with_qmax) {
9045 TEST_REQUIRES_X86_AVX2;
9046 for (uint32_t channels = 32; channels < 256; channels += 48) {
9047 DWConvMicrokernelTester()
9048 .cr(16)
9049 .kr(25)
9050 .channels(channels)
9051 .qmax(128)
9052 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9053 }
9054 }
9055
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_lt_16)9056 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_lt_16) {
9057 TEST_REQUIRES_X86_AVX2;
9058 for (uint32_t channels = 1; channels < 16; channels++) {
9059 DWConvMicrokernelTester()
9060 .cr(16)
9061 .kr(25)
9062 .channels(channels)
9063 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9064 }
9065 }
9066
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_gt_16)9067 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16) {
9068 TEST_REQUIRES_X86_AVX2;
9069 for (uint32_t channels = 17; channels < 32; channels++) {
9070 DWConvMicrokernelTester()
9071 .cr(16)
9072 .kr(25)
9073 .channels(channels)
9074 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9075 }
9076 }
9077
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_gt_16_with_qmin)9078 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16_with_qmin) {
9079 TEST_REQUIRES_X86_AVX2;
9080 for (uint32_t channels = 17; channels < 32; channels++) {
9081 DWConvMicrokernelTester()
9082 .cr(16)
9083 .kr(25)
9084 .channels(channels)
9085 .qmin(128)
9086 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9087 }
9088 }
9089
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,c_gt_16_with_qmax)9090 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16_with_qmax) {
9091 TEST_REQUIRES_X86_AVX2;
9092 for (uint32_t channels = 17; channels < 32; channels++) {
9093 DWConvMicrokernelTester()
9094 .cr(16)
9095 .kr(25)
9096 .channels(channels)
9097 .qmax(128)
9098 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9099 }
9100 }
9101
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,multipixel)9102 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel) {
9103 TEST_REQUIRES_X86_AVX2;
9104 for (size_t channels = 1; channels <= 80; channels += 15) {
9105 DWConvMicrokernelTester()
9106 .cr(16)
9107 .kr(25)
9108 .channels(channels)
9109 .width(3)
9110 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9111 }
9112 }
9113
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,multipixel_with_step)9114 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_step) {
9115 TEST_REQUIRES_X86_AVX2;
9116 for (size_t channels = 1; channels <= 80; channels += 15) {
9117 for (size_t step = 2; step <= 25; step++) {
9118 DWConvMicrokernelTester()
9119 .cr(16)
9120 .kr(25)
9121 .channels(channels)
9122 .width(3)
9123 .step(step)
9124 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9125 }
9126 }
9127 }
9128
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,multipixel_with_output_stride)9129 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_output_stride) {
9130 TEST_REQUIRES_X86_AVX2;
9131 for (size_t channels = 1; channels <= 80; channels += 15) {
9132 DWConvMicrokernelTester()
9133 .cr(16)
9134 .kr(25)
9135 .channels(16)
9136 .width(5)
9137 .output_stride(83)
9138 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9139 }
9140 }
9141
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,multipixel_with_qmin)9142 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_qmin) {
9143 TEST_REQUIRES_X86_AVX2;
9144 for (size_t channels = 1; channels <= 80; channels += 15) {
9145 DWConvMicrokernelTester()
9146 .cr(16)
9147 .kr(25)
9148 .channels(channels)
9149 .width(3)
9150 .qmin(128)
9151 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9152 }
9153 }
9154
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,multipixel_with_qmax)9155 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_qmax) {
9156 TEST_REQUIRES_X86_AVX2;
9157 for (size_t channels = 1; channels <= 80; channels += 15) {
9158 DWConvMicrokernelTester()
9159 .cr(16)
9160 .kr(25)
9161 .channels(channels)
9162 .width(3)
9163 .qmax(128)
9164 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9165 }
9166 }
9167
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,input_zero_point_only)9168 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, input_zero_point_only) {
9169 TEST_REQUIRES_X86_AVX2;
9170 for (size_t channels = 1; channels <= 80; channels += 15) {
9171 DWConvMicrokernelTester()
9172 .cr(16)
9173 .kr(25)
9174 .channels(channels)
9175 .width(3)
9176 .input_zero_point(255)
9177 .kernel_zero_point(0)
9178 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9179 }
9180 }
9181
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,kernel_zero_point_only)9182 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, kernel_zero_point_only) {
9183 TEST_REQUIRES_X86_AVX2;
9184 for (size_t channels = 1; channels <= 80; channels += 15) {
9185 DWConvMicrokernelTester()
9186 .cr(16)
9187 .kr(25)
9188 .channels(channels)
9189 .width(3)
9190 .input_zero_point(0)
9191 .kernel_zero_point(255)
9192 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9193 }
9194 }
9195
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,input_offset)9196 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, input_offset) {
9197 TEST_REQUIRES_X86_AVX2;
9198 for (uint32_t channels = 32; channels < 256; channels += 48) {
9199 DWConvMicrokernelTester()
9200 .cr(16)
9201 .kr(25)
9202 .channels(channels)
9203 .input_offset(304)
9204 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9205 }
9206 }
9207
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32,zero)9208 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, zero) {
9209 TEST_REQUIRES_X86_AVX2;
9210 for (uint32_t mz = 0; mz < 25; mz++) {
9211 for (uint32_t channels = 32; channels < 256; channels += 48) {
9212 DWConvMicrokernelTester()
9213 .cr(16)
9214 .kr(25)
9215 .channels(channels)
9216 .input_offset(304)
9217 .zero_index(mz)
9218 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9219 }
9220 }
9221 }
9222 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9223
9224
9225 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_eq_16)9226 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_eq_16) {
9227 TEST_REQUIRES_X86_XOP;
9228 DWConvMicrokernelTester()
9229 .cr(16)
9230 .kr(25)
9231 .channels(16)
9232 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9233 }
9234
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_div_16)9235 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_div_16) {
9236 TEST_REQUIRES_X86_XOP;
9237 for (uint32_t channels = 32; channels < 256; channels += 48) {
9238 DWConvMicrokernelTester()
9239 .cr(16)
9240 .kr(25)
9241 .channels(channels)
9242 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9243 }
9244 }
9245
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_div_16_with_qmin)9246 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_div_16_with_qmin) {
9247 TEST_REQUIRES_X86_XOP;
9248 for (uint32_t channels = 32; channels < 256; channels += 48) {
9249 DWConvMicrokernelTester()
9250 .cr(16)
9251 .kr(25)
9252 .channels(channels)
9253 .qmin(128)
9254 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9255 }
9256 }
9257
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_div_16_with_qmax)9258 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_div_16_with_qmax) {
9259 TEST_REQUIRES_X86_XOP;
9260 for (uint32_t channels = 32; channels < 256; channels += 48) {
9261 DWConvMicrokernelTester()
9262 .cr(16)
9263 .kr(25)
9264 .channels(channels)
9265 .qmax(128)
9266 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9267 }
9268 }
9269
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_lt_16)9270 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_lt_16) {
9271 TEST_REQUIRES_X86_XOP;
9272 for (uint32_t channels = 1; channels < 16; channels++) {
9273 DWConvMicrokernelTester()
9274 .cr(16)
9275 .kr(25)
9276 .channels(channels)
9277 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9278 }
9279 }
9280
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_gt_16)9281 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_gt_16) {
9282 TEST_REQUIRES_X86_XOP;
9283 for (uint32_t channels = 17; channels < 32; channels++) {
9284 DWConvMicrokernelTester()
9285 .cr(16)
9286 .kr(25)
9287 .channels(channels)
9288 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9289 }
9290 }
9291
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_gt_16_with_qmin)9292 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_gt_16_with_qmin) {
9293 TEST_REQUIRES_X86_XOP;
9294 for (uint32_t channels = 17; channels < 32; channels++) {
9295 DWConvMicrokernelTester()
9296 .cr(16)
9297 .kr(25)
9298 .channels(channels)
9299 .qmin(128)
9300 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9301 }
9302 }
9303
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,c_gt_16_with_qmax)9304 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, c_gt_16_with_qmax) {
9305 TEST_REQUIRES_X86_XOP;
9306 for (uint32_t channels = 17; channels < 32; channels++) {
9307 DWConvMicrokernelTester()
9308 .cr(16)
9309 .kr(25)
9310 .channels(channels)
9311 .qmax(128)
9312 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9313 }
9314 }
9315
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,multipixel)9316 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel) {
9317 TEST_REQUIRES_X86_XOP;
9318 for (size_t channels = 1; channels <= 80; channels += 15) {
9319 DWConvMicrokernelTester()
9320 .cr(16)
9321 .kr(25)
9322 .channels(channels)
9323 .width(3)
9324 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9325 }
9326 }
9327
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,multipixel_with_step)9328 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_step) {
9329 TEST_REQUIRES_X86_XOP;
9330 for (size_t channels = 1; channels <= 80; channels += 15) {
9331 for (size_t step = 2; step <= 25; step++) {
9332 DWConvMicrokernelTester()
9333 .cr(16)
9334 .kr(25)
9335 .channels(channels)
9336 .width(3)
9337 .step(step)
9338 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9339 }
9340 }
9341 }
9342
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,multipixel_with_output_stride)9343 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_output_stride) {
9344 TEST_REQUIRES_X86_XOP;
9345 for (size_t channels = 1; channels <= 80; channels += 15) {
9346 DWConvMicrokernelTester()
9347 .cr(16)
9348 .kr(25)
9349 .channels(16)
9350 .width(5)
9351 .output_stride(83)
9352 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9353 }
9354 }
9355
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,multipixel_with_qmin)9356 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_qmin) {
9357 TEST_REQUIRES_X86_XOP;
9358 for (size_t channels = 1; channels <= 80; channels += 15) {
9359 DWConvMicrokernelTester()
9360 .cr(16)
9361 .kr(25)
9362 .channels(channels)
9363 .width(3)
9364 .qmin(128)
9365 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9366 }
9367 }
9368
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,multipixel_with_qmax)9369 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, multipixel_with_qmax) {
9370 TEST_REQUIRES_X86_XOP;
9371 for (size_t channels = 1; channels <= 80; channels += 15) {
9372 DWConvMicrokernelTester()
9373 .cr(16)
9374 .kr(25)
9375 .channels(channels)
9376 .width(3)
9377 .qmax(128)
9378 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9379 }
9380 }
9381
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,input_zero_point_only)9382 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, input_zero_point_only) {
9383 TEST_REQUIRES_X86_XOP;
9384 for (size_t channels = 1; channels <= 80; channels += 15) {
9385 DWConvMicrokernelTester()
9386 .cr(16)
9387 .kr(25)
9388 .channels(channels)
9389 .width(3)
9390 .input_zero_point(255)
9391 .kernel_zero_point(0)
9392 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9393 }
9394 }
9395
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,kernel_zero_point_only)9396 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, kernel_zero_point_only) {
9397 TEST_REQUIRES_X86_XOP;
9398 for (size_t channels = 1; channels <= 80; channels += 15) {
9399 DWConvMicrokernelTester()
9400 .cr(16)
9401 .kr(25)
9402 .channels(channels)
9403 .width(3)
9404 .input_zero_point(0)
9405 .kernel_zero_point(255)
9406 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9407 }
9408 }
9409
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,input_offset)9410 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, input_offset) {
9411 TEST_REQUIRES_X86_XOP;
9412 for (uint32_t channels = 32; channels < 256; channels += 48) {
9413 DWConvMicrokernelTester()
9414 .cr(16)
9415 .kr(25)
9416 .channels(channels)
9417 .input_offset(304)
9418 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9419 }
9420 }
9421
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32,zero)9422 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__XOP_MUL32, zero) {
9423 TEST_REQUIRES_X86_XOP;
9424 for (uint32_t mz = 0; mz < 25; mz++) {
9425 for (uint32_t channels = 32; channels < 256; channels += 48) {
9426 DWConvMicrokernelTester()
9427 .cr(16)
9428 .kr(25)
9429 .channels(channels)
9430 .input_offset(304)
9431 .zero_index(mz)
9432 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__xop_mul32, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
9433 }
9434 }
9435 }
9436 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9437
9438
9439 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_eq_32)9440 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_eq_32) {
9441 TEST_REQUIRES_X86_AVX2;
9442 DWConvMicrokernelTester()
9443 .cr(32)
9444 .kr(9)
9445 .channels(32)
9446 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9447 }
9448
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_div_32)9449 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32) {
9450 TEST_REQUIRES_X86_AVX2;
9451 for (uint32_t channels = 64; channels < 512; channels += 96) {
9452 DWConvMicrokernelTester()
9453 .cr(32)
9454 .kr(9)
9455 .channels(channels)
9456 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9457 }
9458 }
9459
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_div_32_with_qmin)9460 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32_with_qmin) {
9461 TEST_REQUIRES_X86_AVX2;
9462 for (uint32_t channels = 64; channels < 512; channels += 96) {
9463 DWConvMicrokernelTester()
9464 .cr(32)
9465 .kr(9)
9466 .channels(channels)
9467 .qmin(128)
9468 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9469 }
9470 }
9471
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_div_32_with_qmax)9472 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32_with_qmax) {
9473 TEST_REQUIRES_X86_AVX2;
9474 for (uint32_t channels = 64; channels < 512; channels += 96) {
9475 DWConvMicrokernelTester()
9476 .cr(32)
9477 .kr(9)
9478 .channels(channels)
9479 .qmax(128)
9480 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9481 }
9482 }
9483
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_lt_32)9484 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_lt_32) {
9485 TEST_REQUIRES_X86_AVX2;
9486 for (uint32_t channels = 1; channels < 32; channels++) {
9487 DWConvMicrokernelTester()
9488 .cr(32)
9489 .kr(9)
9490 .channels(channels)
9491 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9492 }
9493 }
9494
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_gt_32)9495 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32) {
9496 TEST_REQUIRES_X86_AVX2;
9497 for (uint32_t channels = 33; channels < 64; channels++) {
9498 DWConvMicrokernelTester()
9499 .cr(32)
9500 .kr(9)
9501 .channels(channels)
9502 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9503 }
9504 }
9505
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_gt_32_with_qmin)9506 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32_with_qmin) {
9507 TEST_REQUIRES_X86_AVX2;
9508 for (uint32_t channels = 33; channels < 64; channels++) {
9509 DWConvMicrokernelTester()
9510 .cr(32)
9511 .kr(9)
9512 .channels(channels)
9513 .qmin(128)
9514 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9515 }
9516 }
9517
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,c_gt_32_with_qmax)9518 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32_with_qmax) {
9519 TEST_REQUIRES_X86_AVX2;
9520 for (uint32_t channels = 33; channels < 64; channels++) {
9521 DWConvMicrokernelTester()
9522 .cr(32)
9523 .kr(9)
9524 .channels(channels)
9525 .qmax(128)
9526 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9527 }
9528 }
9529
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,multipixel)9530 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel) {
9531 TEST_REQUIRES_X86_AVX2;
9532 for (size_t channels = 1; channels <= 160; channels += 31) {
9533 DWConvMicrokernelTester()
9534 .cr(32)
9535 .kr(9)
9536 .channels(channels)
9537 .width(3)
9538 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9539 }
9540 }
9541
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,multipixel_with_step)9542 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_step) {
9543 TEST_REQUIRES_X86_AVX2;
9544 for (size_t channels = 1; channels <= 160; channels += 31) {
9545 for (size_t step = 2; step <= 9; step++) {
9546 DWConvMicrokernelTester()
9547 .cr(32)
9548 .kr(9)
9549 .channels(channels)
9550 .width(3)
9551 .step(step)
9552 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9553 }
9554 }
9555 }
9556
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,multipixel_with_output_stride)9557 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_output_stride) {
9558 TEST_REQUIRES_X86_AVX2;
9559 for (size_t channels = 1; channels <= 160; channels += 31) {
9560 DWConvMicrokernelTester()
9561 .cr(32)
9562 .kr(9)
9563 .channels(32)
9564 .width(5)
9565 .output_stride(163)
9566 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9567 }
9568 }
9569
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,multipixel_with_qmin)9570 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_qmin) {
9571 TEST_REQUIRES_X86_AVX2;
9572 for (size_t channels = 1; channels <= 160; channels += 31) {
9573 DWConvMicrokernelTester()
9574 .cr(32)
9575 .kr(9)
9576 .channels(channels)
9577 .width(3)
9578 .qmin(128)
9579 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9580 }
9581 }
9582
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,multipixel_with_qmax)9583 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_qmax) {
9584 TEST_REQUIRES_X86_AVX2;
9585 for (size_t channels = 1; channels <= 160; channels += 31) {
9586 DWConvMicrokernelTester()
9587 .cr(32)
9588 .kr(9)
9589 .channels(channels)
9590 .width(3)
9591 .qmax(128)
9592 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9593 }
9594 }
9595
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,input_zero_point_only)9596 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, input_zero_point_only) {
9597 TEST_REQUIRES_X86_AVX2;
9598 for (size_t channels = 1; channels <= 160; channels += 31) {
9599 DWConvMicrokernelTester()
9600 .cr(32)
9601 .kr(9)
9602 .channels(channels)
9603 .width(3)
9604 .input_zero_point(255)
9605 .kernel_zero_point(0)
9606 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9607 }
9608 }
9609
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,kernel_zero_point_only)9610 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, kernel_zero_point_only) {
9611 TEST_REQUIRES_X86_AVX2;
9612 for (size_t channels = 1; channels <= 160; channels += 31) {
9613 DWConvMicrokernelTester()
9614 .cr(32)
9615 .kr(9)
9616 .channels(channels)
9617 .width(3)
9618 .input_zero_point(0)
9619 .kernel_zero_point(255)
9620 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9621 }
9622 }
9623
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,input_offset)9624 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, input_offset) {
9625 TEST_REQUIRES_X86_AVX2;
9626 for (uint32_t channels = 64; channels < 512; channels += 96) {
9627 DWConvMicrokernelTester()
9628 .cr(32)
9629 .kr(9)
9630 .channels(channels)
9631 .input_offset(592)
9632 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9633 }
9634 }
9635
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32,zero)9636 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, zero) {
9637 TEST_REQUIRES_X86_AVX2;
9638 for (uint32_t mz = 0; mz < 9; mz++) {
9639 for (uint32_t channels = 64; channels < 512; channels += 96) {
9640 DWConvMicrokernelTester()
9641 .cr(32)
9642 .kr(9)
9643 .channels(channels)
9644 .input_offset(592)
9645 .zero_index(mz)
9646 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9647 }
9648 }
9649 }
9650 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9651
9652
9653 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_eq_32)9654 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_eq_32) {
9655 TEST_REQUIRES_X86_AVX2;
9656 DWConvMicrokernelTester()
9657 .cr(32)
9658 .kr(25)
9659 .channels(32)
9660 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9661 }
9662
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_div_32)9663 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32) {
9664 TEST_REQUIRES_X86_AVX2;
9665 for (uint32_t channels = 64; channels < 512; channels += 96) {
9666 DWConvMicrokernelTester()
9667 .cr(32)
9668 .kr(25)
9669 .channels(channels)
9670 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9671 }
9672 }
9673
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_div_32_with_qmin)9674 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32_with_qmin) {
9675 TEST_REQUIRES_X86_AVX2;
9676 for (uint32_t channels = 64; channels < 512; channels += 96) {
9677 DWConvMicrokernelTester()
9678 .cr(32)
9679 .kr(25)
9680 .channels(channels)
9681 .qmin(128)
9682 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9683 }
9684 }
9685
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_div_32_with_qmax)9686 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32_with_qmax) {
9687 TEST_REQUIRES_X86_AVX2;
9688 for (uint32_t channels = 64; channels < 512; channels += 96) {
9689 DWConvMicrokernelTester()
9690 .cr(32)
9691 .kr(25)
9692 .channels(channels)
9693 .qmax(128)
9694 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9695 }
9696 }
9697
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_lt_32)9698 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_lt_32) {
9699 TEST_REQUIRES_X86_AVX2;
9700 for (uint32_t channels = 1; channels < 32; channels++) {
9701 DWConvMicrokernelTester()
9702 .cr(32)
9703 .kr(25)
9704 .channels(channels)
9705 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9706 }
9707 }
9708
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_gt_32)9709 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32) {
9710 TEST_REQUIRES_X86_AVX2;
9711 for (uint32_t channels = 33; channels < 64; channels++) {
9712 DWConvMicrokernelTester()
9713 .cr(32)
9714 .kr(25)
9715 .channels(channels)
9716 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9717 }
9718 }
9719
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_gt_32_with_qmin)9720 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32_with_qmin) {
9721 TEST_REQUIRES_X86_AVX2;
9722 for (uint32_t channels = 33; channels < 64; channels++) {
9723 DWConvMicrokernelTester()
9724 .cr(32)
9725 .kr(25)
9726 .channels(channels)
9727 .qmin(128)
9728 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9729 }
9730 }
9731
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,c_gt_32_with_qmax)9732 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32_with_qmax) {
9733 TEST_REQUIRES_X86_AVX2;
9734 for (uint32_t channels = 33; channels < 64; channels++) {
9735 DWConvMicrokernelTester()
9736 .cr(32)
9737 .kr(25)
9738 .channels(channels)
9739 .qmax(128)
9740 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9741 }
9742 }
9743
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,multipixel)9744 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel) {
9745 TEST_REQUIRES_X86_AVX2;
9746 for (size_t channels = 1; channels <= 160; channels += 31) {
9747 DWConvMicrokernelTester()
9748 .cr(32)
9749 .kr(25)
9750 .channels(channels)
9751 .width(3)
9752 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9753 }
9754 }
9755
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,multipixel_with_step)9756 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_step) {
9757 TEST_REQUIRES_X86_AVX2;
9758 for (size_t channels = 1; channels <= 160; channels += 31) {
9759 for (size_t step = 2; step <= 25; step++) {
9760 DWConvMicrokernelTester()
9761 .cr(32)
9762 .kr(25)
9763 .channels(channels)
9764 .width(3)
9765 .step(step)
9766 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9767 }
9768 }
9769 }
9770
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,multipixel_with_output_stride)9771 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_output_stride) {
9772 TEST_REQUIRES_X86_AVX2;
9773 for (size_t channels = 1; channels <= 160; channels += 31) {
9774 DWConvMicrokernelTester()
9775 .cr(32)
9776 .kr(25)
9777 .channels(32)
9778 .width(5)
9779 .output_stride(163)
9780 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9781 }
9782 }
9783
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,multipixel_with_qmin)9784 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_qmin) {
9785 TEST_REQUIRES_X86_AVX2;
9786 for (size_t channels = 1; channels <= 160; channels += 31) {
9787 DWConvMicrokernelTester()
9788 .cr(32)
9789 .kr(25)
9790 .channels(channels)
9791 .width(3)
9792 .qmin(128)
9793 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9794 }
9795 }
9796
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,multipixel_with_qmax)9797 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_qmax) {
9798 TEST_REQUIRES_X86_AVX2;
9799 for (size_t channels = 1; channels <= 160; channels += 31) {
9800 DWConvMicrokernelTester()
9801 .cr(32)
9802 .kr(25)
9803 .channels(channels)
9804 .width(3)
9805 .qmax(128)
9806 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9807 }
9808 }
9809
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,input_zero_point_only)9810 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, input_zero_point_only) {
9811 TEST_REQUIRES_X86_AVX2;
9812 for (size_t channels = 1; channels <= 160; channels += 31) {
9813 DWConvMicrokernelTester()
9814 .cr(32)
9815 .kr(25)
9816 .channels(channels)
9817 .width(3)
9818 .input_zero_point(255)
9819 .kernel_zero_point(0)
9820 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9821 }
9822 }
9823
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,kernel_zero_point_only)9824 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, kernel_zero_point_only) {
9825 TEST_REQUIRES_X86_AVX2;
9826 for (size_t channels = 1; channels <= 160; channels += 31) {
9827 DWConvMicrokernelTester()
9828 .cr(32)
9829 .kr(25)
9830 .channels(channels)
9831 .width(3)
9832 .input_zero_point(0)
9833 .kernel_zero_point(255)
9834 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9835 }
9836 }
9837
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,input_offset)9838 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, input_offset) {
9839 TEST_REQUIRES_X86_AVX2;
9840 for (uint32_t channels = 64; channels < 512; channels += 96) {
9841 DWConvMicrokernelTester()
9842 .cr(32)
9843 .kr(25)
9844 .channels(channels)
9845 .input_offset(592)
9846 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9847 }
9848 }
9849
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32,zero)9850 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, zero) {
9851 TEST_REQUIRES_X86_AVX2;
9852 for (uint32_t mz = 0; mz < 25; mz++) {
9853 for (uint32_t channels = 64; channels < 512; channels += 96) {
9854 DWConvMicrokernelTester()
9855 .cr(32)
9856 .kr(25)
9857 .channels(channels)
9858 .input_offset(592)
9859 .zero_index(mz)
9860 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
9861 }
9862 }
9863 }
9864 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9865
9866
9867 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_eq_16)9868 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_eq_16) {
9869 TEST_REQUIRES_X86_AVX512SKX;
9870 DWConvMicrokernelTester()
9871 .cr(16)
9872 .kr(9)
9873 .channels(16)
9874 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
9875 }
9876
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_div_16)9877 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_div_16) {
9878 TEST_REQUIRES_X86_AVX512SKX;
9879 for (uint32_t channels = 32; channels < 256; channels += 48) {
9880 DWConvMicrokernelTester()
9881 .cr(16)
9882 .kr(9)
9883 .channels(channels)
9884 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
9885 }
9886 }
9887
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_div_16_with_qmin)9888 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_div_16_with_qmin) {
9889 TEST_REQUIRES_X86_AVX512SKX;
9890 for (uint32_t channels = 32; channels < 256; channels += 48) {
9891 DWConvMicrokernelTester()
9892 .cr(16)
9893 .kr(9)
9894 .channels(channels)
9895 .qmin(128)
9896 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
9897 }
9898 }
9899
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_div_16_with_qmax)9900 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_div_16_with_qmax) {
9901 TEST_REQUIRES_X86_AVX512SKX;
9902 for (uint32_t channels = 32; channels < 256; channels += 48) {
9903 DWConvMicrokernelTester()
9904 .cr(16)
9905 .kr(9)
9906 .channels(channels)
9907 .qmax(128)
9908 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
9909 }
9910 }
9911
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_lt_16)9912 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_lt_16) {
9913 TEST_REQUIRES_X86_AVX512SKX;
9914 for (uint32_t channels = 1; channels < 16; channels++) {
9915 DWConvMicrokernelTester()
9916 .cr(16)
9917 .kr(9)
9918 .channels(channels)
9919 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
9920 }
9921 }
9922
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_gt_16)9923 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_gt_16) {
9924 TEST_REQUIRES_X86_AVX512SKX;
9925 for (uint32_t channels = 17; channels < 32; channels++) {
9926 DWConvMicrokernelTester()
9927 .cr(16)
9928 .kr(9)
9929 .channels(channels)
9930 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
9931 }
9932 }
9933
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_gt_16_with_qmin)9934 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_gt_16_with_qmin) {
9935 TEST_REQUIRES_X86_AVX512SKX;
9936 for (uint32_t channels = 17; channels < 32; channels++) {
9937 DWConvMicrokernelTester()
9938 .cr(16)
9939 .kr(9)
9940 .channels(channels)
9941 .qmin(128)
9942 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
9943 }
9944 }
9945
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,c_gt_16_with_qmax)9946 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, c_gt_16_with_qmax) {
9947 TEST_REQUIRES_X86_AVX512SKX;
9948 for (uint32_t channels = 17; channels < 32; channels++) {
9949 DWConvMicrokernelTester()
9950 .cr(16)
9951 .kr(9)
9952 .channels(channels)
9953 .qmax(128)
9954 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
9955 }
9956 }
9957
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,multipixel)9958 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel) {
9959 TEST_REQUIRES_X86_AVX512SKX;
9960 for (size_t channels = 1; channels <= 80; channels += 15) {
9961 DWConvMicrokernelTester()
9962 .cr(16)
9963 .kr(9)
9964 .channels(channels)
9965 .width(3)
9966 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
9967 }
9968 }
9969
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,multipixel_with_step)9970 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_step) {
9971 TEST_REQUIRES_X86_AVX512SKX;
9972 for (size_t channels = 1; channels <= 80; channels += 15) {
9973 for (size_t step = 2; step <= 9; step++) {
9974 DWConvMicrokernelTester()
9975 .cr(16)
9976 .kr(9)
9977 .channels(channels)
9978 .width(3)
9979 .step(step)
9980 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
9981 }
9982 }
9983 }
9984
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,multipixel_with_output_stride)9985 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_output_stride) {
9986 TEST_REQUIRES_X86_AVX512SKX;
9987 for (size_t channels = 1; channels <= 80; channels += 15) {
9988 DWConvMicrokernelTester()
9989 .cr(16)
9990 .kr(9)
9991 .channels(16)
9992 .width(5)
9993 .output_stride(83)
9994 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
9995 }
9996 }
9997
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,multipixel_with_qmin)9998 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_qmin) {
9999 TEST_REQUIRES_X86_AVX512SKX;
10000 for (size_t channels = 1; channels <= 80; channels += 15) {
10001 DWConvMicrokernelTester()
10002 .cr(16)
10003 .kr(9)
10004 .channels(channels)
10005 .width(3)
10006 .qmin(128)
10007 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10008 }
10009 }
10010
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,multipixel_with_qmax)10011 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, multipixel_with_qmax) {
10012 TEST_REQUIRES_X86_AVX512SKX;
10013 for (size_t channels = 1; channels <= 80; channels += 15) {
10014 DWConvMicrokernelTester()
10015 .cr(16)
10016 .kr(9)
10017 .channels(channels)
10018 .width(3)
10019 .qmax(128)
10020 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10021 }
10022 }
10023
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,input_zero_point_only)10024 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, input_zero_point_only) {
10025 TEST_REQUIRES_X86_AVX512SKX;
10026 for (size_t channels = 1; channels <= 80; channels += 15) {
10027 DWConvMicrokernelTester()
10028 .cr(16)
10029 .kr(9)
10030 .channels(channels)
10031 .width(3)
10032 .input_zero_point(255)
10033 .kernel_zero_point(0)
10034 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10035 }
10036 }
10037
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,kernel_zero_point_only)10038 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, kernel_zero_point_only) {
10039 TEST_REQUIRES_X86_AVX512SKX;
10040 for (size_t channels = 1; channels <= 80; channels += 15) {
10041 DWConvMicrokernelTester()
10042 .cr(16)
10043 .kr(9)
10044 .channels(channels)
10045 .width(3)
10046 .input_zero_point(0)
10047 .kernel_zero_point(255)
10048 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10049 }
10050 }
10051
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,input_offset)10052 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, input_offset) {
10053 TEST_REQUIRES_X86_AVX512SKX;
10054 for (uint32_t channels = 32; channels < 256; channels += 48) {
10055 DWConvMicrokernelTester()
10056 .cr(16)
10057 .kr(9)
10058 .channels(channels)
10059 .input_offset(304)
10060 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10061 }
10062 }
10063
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32,zero)10064 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__AVX512SKX_MUL32, zero) {
10065 TEST_REQUIRES_X86_AVX512SKX;
10066 for (uint32_t mz = 0; mz < 9; mz++) {
10067 for (uint32_t channels = 32; channels < 256; channels += 48) {
10068 DWConvMicrokernelTester()
10069 .cr(16)
10070 .kr(9)
10071 .channels(channels)
10072 .input_offset(304)
10073 .zero_index(mz)
10074 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10075 }
10076 }
10077 }
10078 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10079
10080
10081 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_eq_16)10082 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_eq_16) {
10083 TEST_REQUIRES_X86_AVX512SKX;
10084 DWConvMicrokernelTester()
10085 .cr(16)
10086 .kr(25)
10087 .channels(16)
10088 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10089 }
10090
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_div_16)10091 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_div_16) {
10092 TEST_REQUIRES_X86_AVX512SKX;
10093 for (uint32_t channels = 32; channels < 256; channels += 48) {
10094 DWConvMicrokernelTester()
10095 .cr(16)
10096 .kr(25)
10097 .channels(channels)
10098 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10099 }
10100 }
10101
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_div_16_with_qmin)10102 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_div_16_with_qmin) {
10103 TEST_REQUIRES_X86_AVX512SKX;
10104 for (uint32_t channels = 32; channels < 256; channels += 48) {
10105 DWConvMicrokernelTester()
10106 .cr(16)
10107 .kr(25)
10108 .channels(channels)
10109 .qmin(128)
10110 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10111 }
10112 }
10113
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_div_16_with_qmax)10114 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_div_16_with_qmax) {
10115 TEST_REQUIRES_X86_AVX512SKX;
10116 for (uint32_t channels = 32; channels < 256; channels += 48) {
10117 DWConvMicrokernelTester()
10118 .cr(16)
10119 .kr(25)
10120 .channels(channels)
10121 .qmax(128)
10122 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10123 }
10124 }
10125
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_lt_16)10126 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_lt_16) {
10127 TEST_REQUIRES_X86_AVX512SKX;
10128 for (uint32_t channels = 1; channels < 16; channels++) {
10129 DWConvMicrokernelTester()
10130 .cr(16)
10131 .kr(25)
10132 .channels(channels)
10133 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10134 }
10135 }
10136
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_gt_16)10137 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_gt_16) {
10138 TEST_REQUIRES_X86_AVX512SKX;
10139 for (uint32_t channels = 17; channels < 32; channels++) {
10140 DWConvMicrokernelTester()
10141 .cr(16)
10142 .kr(25)
10143 .channels(channels)
10144 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10145 }
10146 }
10147
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_gt_16_with_qmin)10148 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_gt_16_with_qmin) {
10149 TEST_REQUIRES_X86_AVX512SKX;
10150 for (uint32_t channels = 17; channels < 32; channels++) {
10151 DWConvMicrokernelTester()
10152 .cr(16)
10153 .kr(25)
10154 .channels(channels)
10155 .qmin(128)
10156 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10157 }
10158 }
10159
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,c_gt_16_with_qmax)10160 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, c_gt_16_with_qmax) {
10161 TEST_REQUIRES_X86_AVX512SKX;
10162 for (uint32_t channels = 17; channels < 32; channels++) {
10163 DWConvMicrokernelTester()
10164 .cr(16)
10165 .kr(25)
10166 .channels(channels)
10167 .qmax(128)
10168 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10169 }
10170 }
10171
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,multipixel)10172 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel) {
10173 TEST_REQUIRES_X86_AVX512SKX;
10174 for (size_t channels = 1; channels <= 80; channels += 15) {
10175 DWConvMicrokernelTester()
10176 .cr(16)
10177 .kr(25)
10178 .channels(channels)
10179 .width(3)
10180 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10181 }
10182 }
10183
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,multipixel_with_step)10184 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_step) {
10185 TEST_REQUIRES_X86_AVX512SKX;
10186 for (size_t channels = 1; channels <= 80; channels += 15) {
10187 for (size_t step = 2; step <= 25; step++) {
10188 DWConvMicrokernelTester()
10189 .cr(16)
10190 .kr(25)
10191 .channels(channels)
10192 .width(3)
10193 .step(step)
10194 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10195 }
10196 }
10197 }
10198
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,multipixel_with_output_stride)10199 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_output_stride) {
10200 TEST_REQUIRES_X86_AVX512SKX;
10201 for (size_t channels = 1; channels <= 80; channels += 15) {
10202 DWConvMicrokernelTester()
10203 .cr(16)
10204 .kr(25)
10205 .channels(16)
10206 .width(5)
10207 .output_stride(83)
10208 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10209 }
10210 }
10211
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,multipixel_with_qmin)10212 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_qmin) {
10213 TEST_REQUIRES_X86_AVX512SKX;
10214 for (size_t channels = 1; channels <= 80; channels += 15) {
10215 DWConvMicrokernelTester()
10216 .cr(16)
10217 .kr(25)
10218 .channels(channels)
10219 .width(3)
10220 .qmin(128)
10221 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10222 }
10223 }
10224
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,multipixel_with_qmax)10225 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, multipixel_with_qmax) {
10226 TEST_REQUIRES_X86_AVX512SKX;
10227 for (size_t channels = 1; channels <= 80; channels += 15) {
10228 DWConvMicrokernelTester()
10229 .cr(16)
10230 .kr(25)
10231 .channels(channels)
10232 .width(3)
10233 .qmax(128)
10234 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10235 }
10236 }
10237
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,input_zero_point_only)10238 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, input_zero_point_only) {
10239 TEST_REQUIRES_X86_AVX512SKX;
10240 for (size_t channels = 1; channels <= 80; channels += 15) {
10241 DWConvMicrokernelTester()
10242 .cr(16)
10243 .kr(25)
10244 .channels(channels)
10245 .width(3)
10246 .input_zero_point(255)
10247 .kernel_zero_point(0)
10248 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10249 }
10250 }
10251
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,kernel_zero_point_only)10252 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, kernel_zero_point_only) {
10253 TEST_REQUIRES_X86_AVX512SKX;
10254 for (size_t channels = 1; channels <= 80; channels += 15) {
10255 DWConvMicrokernelTester()
10256 .cr(16)
10257 .kr(25)
10258 .channels(channels)
10259 .width(3)
10260 .input_zero_point(0)
10261 .kernel_zero_point(255)
10262 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10263 }
10264 }
10265
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,input_offset)10266 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, input_offset) {
10267 TEST_REQUIRES_X86_AVX512SKX;
10268 for (uint32_t channels = 32; channels < 256; channels += 48) {
10269 DWConvMicrokernelTester()
10270 .cr(16)
10271 .kr(25)
10272 .channels(channels)
10273 .input_offset(304)
10274 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10275 }
10276 }
10277
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32,zero)10278 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__AVX512SKX_MUL32, zero) {
10279 TEST_REQUIRES_X86_AVX512SKX;
10280 for (uint32_t mz = 0; mz < 25; mz++) {
10281 for (uint32_t channels = 32; channels < 256; channels += 48) {
10282 DWConvMicrokernelTester()
10283 .cr(16)
10284 .kr(25)
10285 .channels(channels)
10286 .input_offset(304)
10287 .zero_index(mz)
10288 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10289 }
10290 }
10291 }
10292 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10293
10294
10295 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_eq_32)10296 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_eq_32) {
10297 TEST_REQUIRES_X86_AVX512SKX;
10298 DWConvMicrokernelTester()
10299 .cr(32)
10300 .kr(9)
10301 .channels(32)
10302 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10303 }
10304
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_div_32)10305 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_div_32) {
10306 TEST_REQUIRES_X86_AVX512SKX;
10307 for (uint32_t channels = 64; channels < 512; channels += 96) {
10308 DWConvMicrokernelTester()
10309 .cr(32)
10310 .kr(9)
10311 .channels(channels)
10312 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10313 }
10314 }
10315
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_div_32_with_qmin)10316 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_div_32_with_qmin) {
10317 TEST_REQUIRES_X86_AVX512SKX;
10318 for (uint32_t channels = 64; channels < 512; channels += 96) {
10319 DWConvMicrokernelTester()
10320 .cr(32)
10321 .kr(9)
10322 .channels(channels)
10323 .qmin(128)
10324 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10325 }
10326 }
10327
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_div_32_with_qmax)10328 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_div_32_with_qmax) {
10329 TEST_REQUIRES_X86_AVX512SKX;
10330 for (uint32_t channels = 64; channels < 512; channels += 96) {
10331 DWConvMicrokernelTester()
10332 .cr(32)
10333 .kr(9)
10334 .channels(channels)
10335 .qmax(128)
10336 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10337 }
10338 }
10339
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_lt_32)10340 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_lt_32) {
10341 TEST_REQUIRES_X86_AVX512SKX;
10342 for (uint32_t channels = 1; channels < 32; channels++) {
10343 DWConvMicrokernelTester()
10344 .cr(32)
10345 .kr(9)
10346 .channels(channels)
10347 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10348 }
10349 }
10350
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_gt_32)10351 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_gt_32) {
10352 TEST_REQUIRES_X86_AVX512SKX;
10353 for (uint32_t channels = 33; channels < 64; channels++) {
10354 DWConvMicrokernelTester()
10355 .cr(32)
10356 .kr(9)
10357 .channels(channels)
10358 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10359 }
10360 }
10361
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_gt_32_with_qmin)10362 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_gt_32_with_qmin) {
10363 TEST_REQUIRES_X86_AVX512SKX;
10364 for (uint32_t channels = 33; channels < 64; channels++) {
10365 DWConvMicrokernelTester()
10366 .cr(32)
10367 .kr(9)
10368 .channels(channels)
10369 .qmin(128)
10370 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10371 }
10372 }
10373
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,c_gt_32_with_qmax)10374 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, c_gt_32_with_qmax) {
10375 TEST_REQUIRES_X86_AVX512SKX;
10376 for (uint32_t channels = 33; channels < 64; channels++) {
10377 DWConvMicrokernelTester()
10378 .cr(32)
10379 .kr(9)
10380 .channels(channels)
10381 .qmax(128)
10382 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10383 }
10384 }
10385
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,multipixel)10386 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel) {
10387 TEST_REQUIRES_X86_AVX512SKX;
10388 for (size_t channels = 1; channels <= 160; channels += 31) {
10389 DWConvMicrokernelTester()
10390 .cr(32)
10391 .kr(9)
10392 .channels(channels)
10393 .width(3)
10394 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10395 }
10396 }
10397
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,multipixel_with_step)10398 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_step) {
10399 TEST_REQUIRES_X86_AVX512SKX;
10400 for (size_t channels = 1; channels <= 160; channels += 31) {
10401 for (size_t step = 2; step <= 9; step++) {
10402 DWConvMicrokernelTester()
10403 .cr(32)
10404 .kr(9)
10405 .channels(channels)
10406 .width(3)
10407 .step(step)
10408 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10409 }
10410 }
10411 }
10412
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,multipixel_with_output_stride)10413 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_output_stride) {
10414 TEST_REQUIRES_X86_AVX512SKX;
10415 for (size_t channels = 1; channels <= 160; channels += 31) {
10416 DWConvMicrokernelTester()
10417 .cr(32)
10418 .kr(9)
10419 .channels(32)
10420 .width(5)
10421 .output_stride(163)
10422 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10423 }
10424 }
10425
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,multipixel_with_qmin)10426 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_qmin) {
10427 TEST_REQUIRES_X86_AVX512SKX;
10428 for (size_t channels = 1; channels <= 160; channels += 31) {
10429 DWConvMicrokernelTester()
10430 .cr(32)
10431 .kr(9)
10432 .channels(channels)
10433 .width(3)
10434 .qmin(128)
10435 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10436 }
10437 }
10438
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,multipixel_with_qmax)10439 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, multipixel_with_qmax) {
10440 TEST_REQUIRES_X86_AVX512SKX;
10441 for (size_t channels = 1; channels <= 160; channels += 31) {
10442 DWConvMicrokernelTester()
10443 .cr(32)
10444 .kr(9)
10445 .channels(channels)
10446 .width(3)
10447 .qmax(128)
10448 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10449 }
10450 }
10451
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,input_zero_point_only)10452 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, input_zero_point_only) {
10453 TEST_REQUIRES_X86_AVX512SKX;
10454 for (size_t channels = 1; channels <= 160; channels += 31) {
10455 DWConvMicrokernelTester()
10456 .cr(32)
10457 .kr(9)
10458 .channels(channels)
10459 .width(3)
10460 .input_zero_point(255)
10461 .kernel_zero_point(0)
10462 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10463 }
10464 }
10465
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,kernel_zero_point_only)10466 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, kernel_zero_point_only) {
10467 TEST_REQUIRES_X86_AVX512SKX;
10468 for (size_t channels = 1; channels <= 160; channels += 31) {
10469 DWConvMicrokernelTester()
10470 .cr(32)
10471 .kr(9)
10472 .channels(channels)
10473 .width(3)
10474 .input_zero_point(0)
10475 .kernel_zero_point(255)
10476 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10477 }
10478 }
10479
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,input_offset)10480 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, input_offset) {
10481 TEST_REQUIRES_X86_AVX512SKX;
10482 for (uint32_t channels = 64; channels < 512; channels += 96) {
10483 DWConvMicrokernelTester()
10484 .cr(32)
10485 .kr(9)
10486 .channels(channels)
10487 .input_offset(592)
10488 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10489 }
10490 }
10491
TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32,zero)10492 TEST(QU8_DWCONV_MINMAX_FP32_UP32X9__AVX512SKX_MUL32, zero) {
10493 TEST_REQUIRES_X86_AVX512SKX;
10494 for (uint32_t mz = 0; mz < 9; mz++) {
10495 for (uint32_t channels = 64; channels < 512; channels += 96) {
10496 DWConvMicrokernelTester()
10497 .cr(32)
10498 .kr(9)
10499 .channels(channels)
10500 .input_offset(592)
10501 .zero_index(mz)
10502 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10503 }
10504 }
10505 }
10506 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10507
10508
10509 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_eq_32)10510 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_eq_32) {
10511 TEST_REQUIRES_X86_AVX512SKX;
10512 DWConvMicrokernelTester()
10513 .cr(32)
10514 .kr(25)
10515 .channels(32)
10516 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10517 }
10518
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_div_32)10519 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_div_32) {
10520 TEST_REQUIRES_X86_AVX512SKX;
10521 for (uint32_t channels = 64; channels < 512; channels += 96) {
10522 DWConvMicrokernelTester()
10523 .cr(32)
10524 .kr(25)
10525 .channels(channels)
10526 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10527 }
10528 }
10529
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_div_32_with_qmin)10530 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_div_32_with_qmin) {
10531 TEST_REQUIRES_X86_AVX512SKX;
10532 for (uint32_t channels = 64; channels < 512; channels += 96) {
10533 DWConvMicrokernelTester()
10534 .cr(32)
10535 .kr(25)
10536 .channels(channels)
10537 .qmin(128)
10538 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10539 }
10540 }
10541
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_div_32_with_qmax)10542 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_div_32_with_qmax) {
10543 TEST_REQUIRES_X86_AVX512SKX;
10544 for (uint32_t channels = 64; channels < 512; channels += 96) {
10545 DWConvMicrokernelTester()
10546 .cr(32)
10547 .kr(25)
10548 .channels(channels)
10549 .qmax(128)
10550 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10551 }
10552 }
10553
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_lt_32)10554 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_lt_32) {
10555 TEST_REQUIRES_X86_AVX512SKX;
10556 for (uint32_t channels = 1; channels < 32; channels++) {
10557 DWConvMicrokernelTester()
10558 .cr(32)
10559 .kr(25)
10560 .channels(channels)
10561 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10562 }
10563 }
10564
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_gt_32)10565 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_gt_32) {
10566 TEST_REQUIRES_X86_AVX512SKX;
10567 for (uint32_t channels = 33; channels < 64; channels++) {
10568 DWConvMicrokernelTester()
10569 .cr(32)
10570 .kr(25)
10571 .channels(channels)
10572 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10573 }
10574 }
10575
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_gt_32_with_qmin)10576 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_gt_32_with_qmin) {
10577 TEST_REQUIRES_X86_AVX512SKX;
10578 for (uint32_t channels = 33; channels < 64; channels++) {
10579 DWConvMicrokernelTester()
10580 .cr(32)
10581 .kr(25)
10582 .channels(channels)
10583 .qmin(128)
10584 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10585 }
10586 }
10587
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,c_gt_32_with_qmax)10588 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, c_gt_32_with_qmax) {
10589 TEST_REQUIRES_X86_AVX512SKX;
10590 for (uint32_t channels = 33; channels < 64; channels++) {
10591 DWConvMicrokernelTester()
10592 .cr(32)
10593 .kr(25)
10594 .channels(channels)
10595 .qmax(128)
10596 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10597 }
10598 }
10599
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,multipixel)10600 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel) {
10601 TEST_REQUIRES_X86_AVX512SKX;
10602 for (size_t channels = 1; channels <= 160; channels += 31) {
10603 DWConvMicrokernelTester()
10604 .cr(32)
10605 .kr(25)
10606 .channels(channels)
10607 .width(3)
10608 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10609 }
10610 }
10611
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,multipixel_with_step)10612 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_step) {
10613 TEST_REQUIRES_X86_AVX512SKX;
10614 for (size_t channels = 1; channels <= 160; channels += 31) {
10615 for (size_t step = 2; step <= 25; step++) {
10616 DWConvMicrokernelTester()
10617 .cr(32)
10618 .kr(25)
10619 .channels(channels)
10620 .width(3)
10621 .step(step)
10622 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10623 }
10624 }
10625 }
10626
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,multipixel_with_output_stride)10627 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_output_stride) {
10628 TEST_REQUIRES_X86_AVX512SKX;
10629 for (size_t channels = 1; channels <= 160; channels += 31) {
10630 DWConvMicrokernelTester()
10631 .cr(32)
10632 .kr(25)
10633 .channels(32)
10634 .width(5)
10635 .output_stride(163)
10636 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10637 }
10638 }
10639
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,multipixel_with_qmin)10640 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_qmin) {
10641 TEST_REQUIRES_X86_AVX512SKX;
10642 for (size_t channels = 1; channels <= 160; channels += 31) {
10643 DWConvMicrokernelTester()
10644 .cr(32)
10645 .kr(25)
10646 .channels(channels)
10647 .width(3)
10648 .qmin(128)
10649 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10650 }
10651 }
10652
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,multipixel_with_qmax)10653 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, multipixel_with_qmax) {
10654 TEST_REQUIRES_X86_AVX512SKX;
10655 for (size_t channels = 1; channels <= 160; channels += 31) {
10656 DWConvMicrokernelTester()
10657 .cr(32)
10658 .kr(25)
10659 .channels(channels)
10660 .width(3)
10661 .qmax(128)
10662 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10663 }
10664 }
10665
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,input_zero_point_only)10666 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, input_zero_point_only) {
10667 TEST_REQUIRES_X86_AVX512SKX;
10668 for (size_t channels = 1; channels <= 160; channels += 31) {
10669 DWConvMicrokernelTester()
10670 .cr(32)
10671 .kr(25)
10672 .channels(channels)
10673 .width(3)
10674 .input_zero_point(255)
10675 .kernel_zero_point(0)
10676 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10677 }
10678 }
10679
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,kernel_zero_point_only)10680 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, kernel_zero_point_only) {
10681 TEST_REQUIRES_X86_AVX512SKX;
10682 for (size_t channels = 1; channels <= 160; channels += 31) {
10683 DWConvMicrokernelTester()
10684 .cr(32)
10685 .kr(25)
10686 .channels(channels)
10687 .width(3)
10688 .input_zero_point(0)
10689 .kernel_zero_point(255)
10690 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10691 }
10692 }
10693
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,input_offset)10694 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, input_offset) {
10695 TEST_REQUIRES_X86_AVX512SKX;
10696 for (uint32_t channels = 64; channels < 512; channels += 96) {
10697 DWConvMicrokernelTester()
10698 .cr(32)
10699 .kr(25)
10700 .channels(channels)
10701 .input_offset(592)
10702 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10703 }
10704 }
10705
TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32,zero)10706 TEST(QU8_DWCONV_MINMAX_FP32_UP32X25__AVX512SKX_MUL32, zero) {
10707 TEST_REQUIRES_X86_AVX512SKX;
10708 for (uint32_t mz = 0; mz < 25; mz++) {
10709 for (uint32_t channels = 64; channels < 512; channels += 96) {
10710 DWConvMicrokernelTester()
10711 .cr(32)
10712 .kr(25)
10713 .channels(channels)
10714 .input_offset(592)
10715 .zero_index(mz)
10716 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx512skx_mul32, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
10717 }
10718 }
10719 }
10720 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10721
10722
10723 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_eq_8)10724 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_eq_8) {
10725 DWConvMicrokernelTester()
10726 .cr(8)
10727 .kr(9)
10728 .channels(8)
10729 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
10730 }
10731
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_div_8)10732 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_div_8) {
10733 for (uint32_t channels = 16; channels < 128; channels += 24) {
10734 DWConvMicrokernelTester()
10735 .cr(8)
10736 .kr(9)
10737 .channels(channels)
10738 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
10739 }
10740 }
10741
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_div_8_with_qmin)10742 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_div_8_with_qmin) {
10743 for (uint32_t channels = 16; channels < 128; channels += 24) {
10744 DWConvMicrokernelTester()
10745 .cr(8)
10746 .kr(9)
10747 .channels(channels)
10748 .qmin(128)
10749 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
10750 }
10751 }
10752
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_div_8_with_qmax)10753 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_div_8_with_qmax) {
10754 for (uint32_t channels = 16; channels < 128; channels += 24) {
10755 DWConvMicrokernelTester()
10756 .cr(8)
10757 .kr(9)
10758 .channels(channels)
10759 .qmax(128)
10760 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
10761 }
10762 }
10763
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_lt_8)10764 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_lt_8) {
10765 for (uint32_t channels = 1; channels < 8; channels++) {
10766 DWConvMicrokernelTester()
10767 .cr(8)
10768 .kr(9)
10769 .channels(channels)
10770 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
10771 }
10772 }
10773
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_gt_8)10774 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_gt_8) {
10775 for (uint32_t channels = 9; channels < 16; channels++) {
10776 DWConvMicrokernelTester()
10777 .cr(8)
10778 .kr(9)
10779 .channels(channels)
10780 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
10781 }
10782 }
10783
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_gt_8_with_qmin)10784 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_gt_8_with_qmin) {
10785 for (uint32_t channels = 9; channels < 16; channels++) {
10786 DWConvMicrokernelTester()
10787 .cr(8)
10788 .kr(9)
10789 .channels(channels)
10790 .qmin(128)
10791 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
10792 }
10793 }
10794
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,c_gt_8_with_qmax)10795 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, c_gt_8_with_qmax) {
10796 for (uint32_t channels = 9; channels < 16; channels++) {
10797 DWConvMicrokernelTester()
10798 .cr(8)
10799 .kr(9)
10800 .channels(channels)
10801 .qmax(128)
10802 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
10803 }
10804 }
10805
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,multipixel)10806 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel) {
10807 for (size_t channels = 1; channels <= 40; channels += 7) {
10808 DWConvMicrokernelTester()
10809 .cr(8)
10810 .kr(9)
10811 .channels(channels)
10812 .width(3)
10813 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
10814 }
10815 }
10816
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,multipixel_with_step)10817 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_step) {
10818 for (size_t channels = 1; channels <= 40; channels += 7) {
10819 for (size_t step = 2; step <= 9; step++) {
10820 DWConvMicrokernelTester()
10821 .cr(8)
10822 .kr(9)
10823 .channels(channels)
10824 .width(3)
10825 .step(step)
10826 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
10827 }
10828 }
10829 }
10830
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,multipixel_with_output_stride)10831 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
10832 for (size_t channels = 1; channels <= 40; channels += 7) {
10833 DWConvMicrokernelTester()
10834 .cr(8)
10835 .kr(9)
10836 .channels(8)
10837 .width(5)
10838 .output_stride(43)
10839 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
10840 }
10841 }
10842
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,multipixel_with_qmin)10843 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_qmin) {
10844 for (size_t channels = 1; channels <= 40; channels += 7) {
10845 DWConvMicrokernelTester()
10846 .cr(8)
10847 .kr(9)
10848 .channels(channels)
10849 .width(3)
10850 .qmin(128)
10851 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
10852 }
10853 }
10854
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,multipixel_with_qmax)10855 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, multipixel_with_qmax) {
10856 for (size_t channels = 1; channels <= 40; channels += 7) {
10857 DWConvMicrokernelTester()
10858 .cr(8)
10859 .kr(9)
10860 .channels(channels)
10861 .width(3)
10862 .qmax(128)
10863 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
10864 }
10865 }
10866
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,input_zero_point_only)10867 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, input_zero_point_only) {
10868 for (size_t channels = 1; channels <= 40; channels += 7) {
10869 DWConvMicrokernelTester()
10870 .cr(8)
10871 .kr(9)
10872 .channels(channels)
10873 .width(3)
10874 .input_zero_point(255)
10875 .kernel_zero_point(0)
10876 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
10877 }
10878 }
10879
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,kernel_zero_point_only)10880 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, kernel_zero_point_only) {
10881 for (size_t channels = 1; channels <= 40; channels += 7) {
10882 DWConvMicrokernelTester()
10883 .cr(8)
10884 .kr(9)
10885 .channels(channels)
10886 .width(3)
10887 .input_zero_point(0)
10888 .kernel_zero_point(255)
10889 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
10890 }
10891 }
10892
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,input_offset)10893 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, input_offset) {
10894 for (uint32_t channels = 16; channels < 128; channels += 24) {
10895 DWConvMicrokernelTester()
10896 .cr(8)
10897 .kr(9)
10898 .channels(channels)
10899 .input_offset(176)
10900 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
10901 }
10902 }
10903
TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16,zero)10904 TEST(QU8_DWCONV_MINMAX_FP32_UP8X9__WASMSIMD_MUL16, zero) {
10905 for (uint32_t mz = 0; mz < 9; mz++) {
10906 for (uint32_t channels = 16; channels < 128; channels += 24) {
10907 DWConvMicrokernelTester()
10908 .cr(8)
10909 .kr(9)
10910 .channels(channels)
10911 .input_offset(176)
10912 .zero_index(mz)
10913 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
10914 }
10915 }
10916 }
10917 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10918
10919
10920 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_eq_8)10921 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_eq_8) {
10922 DWConvMicrokernelTester()
10923 .cr(8)
10924 .kr(25)
10925 .channels(8)
10926 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
10927 }
10928
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_div_8)10929 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_div_8) {
10930 for (uint32_t channels = 16; channels < 128; channels += 24) {
10931 DWConvMicrokernelTester()
10932 .cr(8)
10933 .kr(25)
10934 .channels(channels)
10935 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
10936 }
10937 }
10938
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_div_8_with_qmin)10939 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_div_8_with_qmin) {
10940 for (uint32_t channels = 16; channels < 128; channels += 24) {
10941 DWConvMicrokernelTester()
10942 .cr(8)
10943 .kr(25)
10944 .channels(channels)
10945 .qmin(128)
10946 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
10947 }
10948 }
10949
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_div_8_with_qmax)10950 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_div_8_with_qmax) {
10951 for (uint32_t channels = 16; channels < 128; channels += 24) {
10952 DWConvMicrokernelTester()
10953 .cr(8)
10954 .kr(25)
10955 .channels(channels)
10956 .qmax(128)
10957 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
10958 }
10959 }
10960
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_lt_8)10961 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_lt_8) {
10962 for (uint32_t channels = 1; channels < 8; channels++) {
10963 DWConvMicrokernelTester()
10964 .cr(8)
10965 .kr(25)
10966 .channels(channels)
10967 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
10968 }
10969 }
10970
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_gt_8)10971 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_gt_8) {
10972 for (uint32_t channels = 9; channels < 16; channels++) {
10973 DWConvMicrokernelTester()
10974 .cr(8)
10975 .kr(25)
10976 .channels(channels)
10977 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
10978 }
10979 }
10980
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_gt_8_with_qmin)10981 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_gt_8_with_qmin) {
10982 for (uint32_t channels = 9; channels < 16; channels++) {
10983 DWConvMicrokernelTester()
10984 .cr(8)
10985 .kr(25)
10986 .channels(channels)
10987 .qmin(128)
10988 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
10989 }
10990 }
10991
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,c_gt_8_with_qmax)10992 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, c_gt_8_with_qmax) {
10993 for (uint32_t channels = 9; channels < 16; channels++) {
10994 DWConvMicrokernelTester()
10995 .cr(8)
10996 .kr(25)
10997 .channels(channels)
10998 .qmax(128)
10999 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11000 }
11001 }
11002
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,multipixel)11003 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel) {
11004 for (size_t channels = 1; channels <= 40; channels += 7) {
11005 DWConvMicrokernelTester()
11006 .cr(8)
11007 .kr(25)
11008 .channels(channels)
11009 .width(3)
11010 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11011 }
11012 }
11013
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,multipixel_with_step)11014 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_step) {
11015 for (size_t channels = 1; channels <= 40; channels += 7) {
11016 for (size_t step = 2; step <= 25; step++) {
11017 DWConvMicrokernelTester()
11018 .cr(8)
11019 .kr(25)
11020 .channels(channels)
11021 .width(3)
11022 .step(step)
11023 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11024 }
11025 }
11026 }
11027
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,multipixel_with_output_stride)11028 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
11029 for (size_t channels = 1; channels <= 40; channels += 7) {
11030 DWConvMicrokernelTester()
11031 .cr(8)
11032 .kr(25)
11033 .channels(8)
11034 .width(5)
11035 .output_stride(43)
11036 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11037 }
11038 }
11039
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,multipixel_with_qmin)11040 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_qmin) {
11041 for (size_t channels = 1; channels <= 40; channels += 7) {
11042 DWConvMicrokernelTester()
11043 .cr(8)
11044 .kr(25)
11045 .channels(channels)
11046 .width(3)
11047 .qmin(128)
11048 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11049 }
11050 }
11051
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,multipixel_with_qmax)11052 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, multipixel_with_qmax) {
11053 for (size_t channels = 1; channels <= 40; channels += 7) {
11054 DWConvMicrokernelTester()
11055 .cr(8)
11056 .kr(25)
11057 .channels(channels)
11058 .width(3)
11059 .qmax(128)
11060 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11061 }
11062 }
11063
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,input_zero_point_only)11064 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, input_zero_point_only) {
11065 for (size_t channels = 1; channels <= 40; channels += 7) {
11066 DWConvMicrokernelTester()
11067 .cr(8)
11068 .kr(25)
11069 .channels(channels)
11070 .width(3)
11071 .input_zero_point(255)
11072 .kernel_zero_point(0)
11073 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11074 }
11075 }
11076
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,kernel_zero_point_only)11077 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, kernel_zero_point_only) {
11078 for (size_t channels = 1; channels <= 40; channels += 7) {
11079 DWConvMicrokernelTester()
11080 .cr(8)
11081 .kr(25)
11082 .channels(channels)
11083 .width(3)
11084 .input_zero_point(0)
11085 .kernel_zero_point(255)
11086 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11087 }
11088 }
11089
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,input_offset)11090 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, input_offset) {
11091 for (uint32_t channels = 16; channels < 128; channels += 24) {
11092 DWConvMicrokernelTester()
11093 .cr(8)
11094 .kr(25)
11095 .channels(channels)
11096 .input_offset(176)
11097 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11098 }
11099 }
11100
TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16,zero)11101 TEST(QU8_DWCONV_MINMAX_FP32_UP8X25__WASMSIMD_MUL16, zero) {
11102 for (uint32_t mz = 0; mz < 25; mz++) {
11103 for (uint32_t channels = 16; channels < 128; channels += 24) {
11104 DWConvMicrokernelTester()
11105 .cr(8)
11106 .kr(25)
11107 .channels(channels)
11108 .input_offset(176)
11109 .zero_index(mz)
11110 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up8x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11111 }
11112 }
11113 }
11114 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11115
11116
11117 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_eq_16)11118 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_eq_16) {
11119 DWConvMicrokernelTester()
11120 .cr(16)
11121 .kr(9)
11122 .channels(16)
11123 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11124 }
11125
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_div_16)11126 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_div_16) {
11127 for (uint32_t channels = 32; channels < 256; channels += 48) {
11128 DWConvMicrokernelTester()
11129 .cr(16)
11130 .kr(9)
11131 .channels(channels)
11132 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11133 }
11134 }
11135
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_div_16_with_qmin)11136 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_div_16_with_qmin) {
11137 for (uint32_t channels = 32; channels < 256; channels += 48) {
11138 DWConvMicrokernelTester()
11139 .cr(16)
11140 .kr(9)
11141 .channels(channels)
11142 .qmin(128)
11143 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11144 }
11145 }
11146
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_div_16_with_qmax)11147 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_div_16_with_qmax) {
11148 for (uint32_t channels = 32; channels < 256; channels += 48) {
11149 DWConvMicrokernelTester()
11150 .cr(16)
11151 .kr(9)
11152 .channels(channels)
11153 .qmax(128)
11154 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11155 }
11156 }
11157
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_lt_16)11158 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_lt_16) {
11159 for (uint32_t channels = 1; channels < 16; channels++) {
11160 DWConvMicrokernelTester()
11161 .cr(16)
11162 .kr(9)
11163 .channels(channels)
11164 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11165 }
11166 }
11167
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_gt_16)11168 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_gt_16) {
11169 for (uint32_t channels = 17; channels < 32; channels++) {
11170 DWConvMicrokernelTester()
11171 .cr(16)
11172 .kr(9)
11173 .channels(channels)
11174 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11175 }
11176 }
11177
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_gt_16_with_qmin)11178 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_gt_16_with_qmin) {
11179 for (uint32_t channels = 17; channels < 32; channels++) {
11180 DWConvMicrokernelTester()
11181 .cr(16)
11182 .kr(9)
11183 .channels(channels)
11184 .qmin(128)
11185 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11186 }
11187 }
11188
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,c_gt_16_with_qmax)11189 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, c_gt_16_with_qmax) {
11190 for (uint32_t channels = 17; channels < 32; channels++) {
11191 DWConvMicrokernelTester()
11192 .cr(16)
11193 .kr(9)
11194 .channels(channels)
11195 .qmax(128)
11196 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11197 }
11198 }
11199
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,multipixel)11200 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel) {
11201 for (size_t channels = 1; channels <= 80; channels += 15) {
11202 DWConvMicrokernelTester()
11203 .cr(16)
11204 .kr(9)
11205 .channels(channels)
11206 .width(3)
11207 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11208 }
11209 }
11210
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,multipixel_with_step)11211 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_step) {
11212 for (size_t channels = 1; channels <= 80; channels += 15) {
11213 for (size_t step = 2; step <= 9; step++) {
11214 DWConvMicrokernelTester()
11215 .cr(16)
11216 .kr(9)
11217 .channels(channels)
11218 .width(3)
11219 .step(step)
11220 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11221 }
11222 }
11223 }
11224
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,multipixel_with_output_stride)11225 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
11226 for (size_t channels = 1; channels <= 80; channels += 15) {
11227 DWConvMicrokernelTester()
11228 .cr(16)
11229 .kr(9)
11230 .channels(16)
11231 .width(5)
11232 .output_stride(83)
11233 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11234 }
11235 }
11236
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,multipixel_with_qmin)11237 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_qmin) {
11238 for (size_t channels = 1; channels <= 80; channels += 15) {
11239 DWConvMicrokernelTester()
11240 .cr(16)
11241 .kr(9)
11242 .channels(channels)
11243 .width(3)
11244 .qmin(128)
11245 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11246 }
11247 }
11248
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,multipixel_with_qmax)11249 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, multipixel_with_qmax) {
11250 for (size_t channels = 1; channels <= 80; channels += 15) {
11251 DWConvMicrokernelTester()
11252 .cr(16)
11253 .kr(9)
11254 .channels(channels)
11255 .width(3)
11256 .qmax(128)
11257 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11258 }
11259 }
11260
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,input_zero_point_only)11261 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, input_zero_point_only) {
11262 for (size_t channels = 1; channels <= 80; channels += 15) {
11263 DWConvMicrokernelTester()
11264 .cr(16)
11265 .kr(9)
11266 .channels(channels)
11267 .width(3)
11268 .input_zero_point(255)
11269 .kernel_zero_point(0)
11270 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11271 }
11272 }
11273
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,kernel_zero_point_only)11274 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, kernel_zero_point_only) {
11275 for (size_t channels = 1; channels <= 80; channels += 15) {
11276 DWConvMicrokernelTester()
11277 .cr(16)
11278 .kr(9)
11279 .channels(channels)
11280 .width(3)
11281 .input_zero_point(0)
11282 .kernel_zero_point(255)
11283 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11284 }
11285 }
11286
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,input_offset)11287 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, input_offset) {
11288 for (uint32_t channels = 32; channels < 256; channels += 48) {
11289 DWConvMicrokernelTester()
11290 .cr(16)
11291 .kr(9)
11292 .channels(channels)
11293 .input_offset(304)
11294 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11295 }
11296 }
11297
TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16,zero)11298 TEST(QU8_DWCONV_MINMAX_FP32_UP16X9__WASMSIMD_MUL16, zero) {
11299 for (uint32_t mz = 0; mz < 9; mz++) {
11300 for (uint32_t channels = 32; channels < 256; channels += 48) {
11301 DWConvMicrokernelTester()
11302 .cr(16)
11303 .kr(9)
11304 .channels(channels)
11305 .input_offset(304)
11306 .zero_index(mz)
11307 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11308 }
11309 }
11310 }
11311 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11312
11313
11314 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_eq_16)11315 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_eq_16) {
11316 DWConvMicrokernelTester()
11317 .cr(16)
11318 .kr(25)
11319 .channels(16)
11320 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11321 }
11322
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_div_16)11323 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_div_16) {
11324 for (uint32_t channels = 32; channels < 256; channels += 48) {
11325 DWConvMicrokernelTester()
11326 .cr(16)
11327 .kr(25)
11328 .channels(channels)
11329 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11330 }
11331 }
11332
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_div_16_with_qmin)11333 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_div_16_with_qmin) {
11334 for (uint32_t channels = 32; channels < 256; channels += 48) {
11335 DWConvMicrokernelTester()
11336 .cr(16)
11337 .kr(25)
11338 .channels(channels)
11339 .qmin(128)
11340 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11341 }
11342 }
11343
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_div_16_with_qmax)11344 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_div_16_with_qmax) {
11345 for (uint32_t channels = 32; channels < 256; channels += 48) {
11346 DWConvMicrokernelTester()
11347 .cr(16)
11348 .kr(25)
11349 .channels(channels)
11350 .qmax(128)
11351 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11352 }
11353 }
11354
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_lt_16)11355 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_lt_16) {
11356 for (uint32_t channels = 1; channels < 16; channels++) {
11357 DWConvMicrokernelTester()
11358 .cr(16)
11359 .kr(25)
11360 .channels(channels)
11361 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11362 }
11363 }
11364
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_gt_16)11365 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_gt_16) {
11366 for (uint32_t channels = 17; channels < 32; channels++) {
11367 DWConvMicrokernelTester()
11368 .cr(16)
11369 .kr(25)
11370 .channels(channels)
11371 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11372 }
11373 }
11374
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_gt_16_with_qmin)11375 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_gt_16_with_qmin) {
11376 for (uint32_t channels = 17; channels < 32; channels++) {
11377 DWConvMicrokernelTester()
11378 .cr(16)
11379 .kr(25)
11380 .channels(channels)
11381 .qmin(128)
11382 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11383 }
11384 }
11385
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,c_gt_16_with_qmax)11386 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, c_gt_16_with_qmax) {
11387 for (uint32_t channels = 17; channels < 32; channels++) {
11388 DWConvMicrokernelTester()
11389 .cr(16)
11390 .kr(25)
11391 .channels(channels)
11392 .qmax(128)
11393 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11394 }
11395 }
11396
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,multipixel)11397 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel) {
11398 for (size_t channels = 1; channels <= 80; channels += 15) {
11399 DWConvMicrokernelTester()
11400 .cr(16)
11401 .kr(25)
11402 .channels(channels)
11403 .width(3)
11404 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11405 }
11406 }
11407
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,multipixel_with_step)11408 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_step) {
11409 for (size_t channels = 1; channels <= 80; channels += 15) {
11410 for (size_t step = 2; step <= 25; step++) {
11411 DWConvMicrokernelTester()
11412 .cr(16)
11413 .kr(25)
11414 .channels(channels)
11415 .width(3)
11416 .step(step)
11417 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11418 }
11419 }
11420 }
11421
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,multipixel_with_output_stride)11422 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
11423 for (size_t channels = 1; channels <= 80; channels += 15) {
11424 DWConvMicrokernelTester()
11425 .cr(16)
11426 .kr(25)
11427 .channels(16)
11428 .width(5)
11429 .output_stride(83)
11430 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11431 }
11432 }
11433
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,multipixel_with_qmin)11434 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_qmin) {
11435 for (size_t channels = 1; channels <= 80; channels += 15) {
11436 DWConvMicrokernelTester()
11437 .cr(16)
11438 .kr(25)
11439 .channels(channels)
11440 .width(3)
11441 .qmin(128)
11442 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11443 }
11444 }
11445
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,multipixel_with_qmax)11446 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, multipixel_with_qmax) {
11447 for (size_t channels = 1; channels <= 80; channels += 15) {
11448 DWConvMicrokernelTester()
11449 .cr(16)
11450 .kr(25)
11451 .channels(channels)
11452 .width(3)
11453 .qmax(128)
11454 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11455 }
11456 }
11457
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,input_zero_point_only)11458 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, input_zero_point_only) {
11459 for (size_t channels = 1; channels <= 80; channels += 15) {
11460 DWConvMicrokernelTester()
11461 .cr(16)
11462 .kr(25)
11463 .channels(channels)
11464 .width(3)
11465 .input_zero_point(255)
11466 .kernel_zero_point(0)
11467 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11468 }
11469 }
11470
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,kernel_zero_point_only)11471 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, kernel_zero_point_only) {
11472 for (size_t channels = 1; channels <= 80; channels += 15) {
11473 DWConvMicrokernelTester()
11474 .cr(16)
11475 .kr(25)
11476 .channels(channels)
11477 .width(3)
11478 .input_zero_point(0)
11479 .kernel_zero_point(255)
11480 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11481 }
11482 }
11483
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,input_offset)11484 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, input_offset) {
11485 for (uint32_t channels = 32; channels < 256; channels += 48) {
11486 DWConvMicrokernelTester()
11487 .cr(16)
11488 .kr(25)
11489 .channels(channels)
11490 .input_offset(304)
11491 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11492 }
11493 }
11494
TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16,zero)11495 TEST(QU8_DWCONV_MINMAX_FP32_UP16X25__WASMSIMD_MUL16, zero) {
11496 for (uint32_t mz = 0; mz < 25; mz++) {
11497 for (uint32_t channels = 32; channels < 256; channels += 48) {
11498 DWConvMicrokernelTester()
11499 .cr(16)
11500 .kr(25)
11501 .channels(channels)
11502 .input_offset(304)
11503 .zero_index(mz)
11504 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up16x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11505 }
11506 }
11507 }
11508 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11509
11510
11511 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_eq_24)11512 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_eq_24) {
11513 DWConvMicrokernelTester()
11514 .cr(24)
11515 .kr(9)
11516 .channels(24)
11517 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11518 }
11519
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_div_24)11520 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_div_24) {
11521 for (uint32_t channels = 48; channels < 384; channels += 72) {
11522 DWConvMicrokernelTester()
11523 .cr(24)
11524 .kr(9)
11525 .channels(channels)
11526 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11527 }
11528 }
11529
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_div_24_with_qmin)11530 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_div_24_with_qmin) {
11531 for (uint32_t channels = 48; channels < 384; channels += 72) {
11532 DWConvMicrokernelTester()
11533 .cr(24)
11534 .kr(9)
11535 .channels(channels)
11536 .qmin(128)
11537 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11538 }
11539 }
11540
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_div_24_with_qmax)11541 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_div_24_with_qmax) {
11542 for (uint32_t channels = 48; channels < 384; channels += 72) {
11543 DWConvMicrokernelTester()
11544 .cr(24)
11545 .kr(9)
11546 .channels(channels)
11547 .qmax(128)
11548 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11549 }
11550 }
11551
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_lt_24)11552 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_lt_24) {
11553 for (uint32_t channels = 1; channels < 24; channels++) {
11554 DWConvMicrokernelTester()
11555 .cr(24)
11556 .kr(9)
11557 .channels(channels)
11558 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11559 }
11560 }
11561
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_gt_24)11562 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_gt_24) {
11563 for (uint32_t channels = 25; channels < 48; channels++) {
11564 DWConvMicrokernelTester()
11565 .cr(24)
11566 .kr(9)
11567 .channels(channels)
11568 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11569 }
11570 }
11571
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_gt_24_with_qmin)11572 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_gt_24_with_qmin) {
11573 for (uint32_t channels = 25; channels < 48; channels++) {
11574 DWConvMicrokernelTester()
11575 .cr(24)
11576 .kr(9)
11577 .channels(channels)
11578 .qmin(128)
11579 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11580 }
11581 }
11582
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,c_gt_24_with_qmax)11583 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, c_gt_24_with_qmax) {
11584 for (uint32_t channels = 25; channels < 48; channels++) {
11585 DWConvMicrokernelTester()
11586 .cr(24)
11587 .kr(9)
11588 .channels(channels)
11589 .qmax(128)
11590 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11591 }
11592 }
11593
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,multipixel)11594 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel) {
11595 for (size_t channels = 1; channels <= 120; channels += 23) {
11596 DWConvMicrokernelTester()
11597 .cr(24)
11598 .kr(9)
11599 .channels(channels)
11600 .width(3)
11601 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11602 }
11603 }
11604
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,multipixel_with_step)11605 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_step) {
11606 for (size_t channels = 1; channels <= 120; channels += 23) {
11607 for (size_t step = 2; step <= 9; step++) {
11608 DWConvMicrokernelTester()
11609 .cr(24)
11610 .kr(9)
11611 .channels(channels)
11612 .width(3)
11613 .step(step)
11614 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11615 }
11616 }
11617 }
11618
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,multipixel_with_output_stride)11619 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
11620 for (size_t channels = 1; channels <= 120; channels += 23) {
11621 DWConvMicrokernelTester()
11622 .cr(24)
11623 .kr(9)
11624 .channels(24)
11625 .width(5)
11626 .output_stride(127)
11627 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11628 }
11629 }
11630
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,multipixel_with_qmin)11631 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_qmin) {
11632 for (size_t channels = 1; channels <= 120; channels += 23) {
11633 DWConvMicrokernelTester()
11634 .cr(24)
11635 .kr(9)
11636 .channels(channels)
11637 .width(3)
11638 .qmin(128)
11639 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11640 }
11641 }
11642
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,multipixel_with_qmax)11643 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, multipixel_with_qmax) {
11644 for (size_t channels = 1; channels <= 120; channels += 23) {
11645 DWConvMicrokernelTester()
11646 .cr(24)
11647 .kr(9)
11648 .channels(channels)
11649 .width(3)
11650 .qmax(128)
11651 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11652 }
11653 }
11654
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,input_zero_point_only)11655 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, input_zero_point_only) {
11656 for (size_t channels = 1; channels <= 120; channels += 23) {
11657 DWConvMicrokernelTester()
11658 .cr(24)
11659 .kr(9)
11660 .channels(channels)
11661 .width(3)
11662 .input_zero_point(255)
11663 .kernel_zero_point(0)
11664 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11665 }
11666 }
11667
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,kernel_zero_point_only)11668 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, kernel_zero_point_only) {
11669 for (size_t channels = 1; channels <= 120; channels += 23) {
11670 DWConvMicrokernelTester()
11671 .cr(24)
11672 .kr(9)
11673 .channels(channels)
11674 .width(3)
11675 .input_zero_point(0)
11676 .kernel_zero_point(255)
11677 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11678 }
11679 }
11680
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,input_offset)11681 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, input_offset) {
11682 for (uint32_t channels = 48; channels < 384; channels += 72) {
11683 DWConvMicrokernelTester()
11684 .cr(24)
11685 .kr(9)
11686 .channels(channels)
11687 .input_offset(464)
11688 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11689 }
11690 }
11691
TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16,zero)11692 TEST(QU8_DWCONV_MINMAX_FP32_UP24X9__WASMSIMD_MUL16, zero) {
11693 for (uint32_t mz = 0; mz < 9; mz++) {
11694 for (uint32_t channels = 48; channels < 384; channels += 72) {
11695 DWConvMicrokernelTester()
11696 .cr(24)
11697 .kr(9)
11698 .channels(channels)
11699 .input_offset(464)
11700 .zero_index(mz)
11701 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x9__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11702 }
11703 }
11704 }
11705 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11706
11707
11708 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_eq_24)11709 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_eq_24) {
11710 DWConvMicrokernelTester()
11711 .cr(24)
11712 .kr(25)
11713 .channels(24)
11714 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11715 }
11716
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_div_24)11717 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_div_24) {
11718 for (uint32_t channels = 48; channels < 384; channels += 72) {
11719 DWConvMicrokernelTester()
11720 .cr(24)
11721 .kr(25)
11722 .channels(channels)
11723 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11724 }
11725 }
11726
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_div_24_with_qmin)11727 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_div_24_with_qmin) {
11728 for (uint32_t channels = 48; channels < 384; channels += 72) {
11729 DWConvMicrokernelTester()
11730 .cr(24)
11731 .kr(25)
11732 .channels(channels)
11733 .qmin(128)
11734 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11735 }
11736 }
11737
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_div_24_with_qmax)11738 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_div_24_with_qmax) {
11739 for (uint32_t channels = 48; channels < 384; channels += 72) {
11740 DWConvMicrokernelTester()
11741 .cr(24)
11742 .kr(25)
11743 .channels(channels)
11744 .qmax(128)
11745 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11746 }
11747 }
11748
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_lt_24)11749 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_lt_24) {
11750 for (uint32_t channels = 1; channels < 24; channels++) {
11751 DWConvMicrokernelTester()
11752 .cr(24)
11753 .kr(25)
11754 .channels(channels)
11755 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11756 }
11757 }
11758
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_gt_24)11759 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_gt_24) {
11760 for (uint32_t channels = 25; channels < 48; channels++) {
11761 DWConvMicrokernelTester()
11762 .cr(24)
11763 .kr(25)
11764 .channels(channels)
11765 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11766 }
11767 }
11768
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_gt_24_with_qmin)11769 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_gt_24_with_qmin) {
11770 for (uint32_t channels = 25; channels < 48; channels++) {
11771 DWConvMicrokernelTester()
11772 .cr(24)
11773 .kr(25)
11774 .channels(channels)
11775 .qmin(128)
11776 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11777 }
11778 }
11779
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,c_gt_24_with_qmax)11780 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, c_gt_24_with_qmax) {
11781 for (uint32_t channels = 25; channels < 48; channels++) {
11782 DWConvMicrokernelTester()
11783 .cr(24)
11784 .kr(25)
11785 .channels(channels)
11786 .qmax(128)
11787 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11788 }
11789 }
11790
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,multipixel)11791 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel) {
11792 for (size_t channels = 1; channels <= 120; channels += 23) {
11793 DWConvMicrokernelTester()
11794 .cr(24)
11795 .kr(25)
11796 .channels(channels)
11797 .width(3)
11798 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11799 }
11800 }
11801
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,multipixel_with_step)11802 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_step) {
11803 for (size_t channels = 1; channels <= 120; channels += 23) {
11804 for (size_t step = 2; step <= 25; step++) {
11805 DWConvMicrokernelTester()
11806 .cr(24)
11807 .kr(25)
11808 .channels(channels)
11809 .width(3)
11810 .step(step)
11811 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11812 }
11813 }
11814 }
11815
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,multipixel_with_output_stride)11816 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
11817 for (size_t channels = 1; channels <= 120; channels += 23) {
11818 DWConvMicrokernelTester()
11819 .cr(24)
11820 .kr(25)
11821 .channels(24)
11822 .width(5)
11823 .output_stride(127)
11824 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11825 }
11826 }
11827
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,multipixel_with_qmin)11828 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_qmin) {
11829 for (size_t channels = 1; channels <= 120; channels += 23) {
11830 DWConvMicrokernelTester()
11831 .cr(24)
11832 .kr(25)
11833 .channels(channels)
11834 .width(3)
11835 .qmin(128)
11836 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11837 }
11838 }
11839
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,multipixel_with_qmax)11840 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, multipixel_with_qmax) {
11841 for (size_t channels = 1; channels <= 120; channels += 23) {
11842 DWConvMicrokernelTester()
11843 .cr(24)
11844 .kr(25)
11845 .channels(channels)
11846 .width(3)
11847 .qmax(128)
11848 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11849 }
11850 }
11851
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,input_zero_point_only)11852 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, input_zero_point_only) {
11853 for (size_t channels = 1; channels <= 120; channels += 23) {
11854 DWConvMicrokernelTester()
11855 .cr(24)
11856 .kr(25)
11857 .channels(channels)
11858 .width(3)
11859 .input_zero_point(255)
11860 .kernel_zero_point(0)
11861 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11862 }
11863 }
11864
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,kernel_zero_point_only)11865 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, kernel_zero_point_only) {
11866 for (size_t channels = 1; channels <= 120; channels += 23) {
11867 DWConvMicrokernelTester()
11868 .cr(24)
11869 .kr(25)
11870 .channels(channels)
11871 .width(3)
11872 .input_zero_point(0)
11873 .kernel_zero_point(255)
11874 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11875 }
11876 }
11877
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,input_offset)11878 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, input_offset) {
11879 for (uint32_t channels = 48; channels < 384; channels += 72) {
11880 DWConvMicrokernelTester()
11881 .cr(24)
11882 .kr(25)
11883 .channels(channels)
11884 .input_offset(464)
11885 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11886 }
11887 }
11888
TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16,zero)11889 TEST(QU8_DWCONV_MINMAX_FP32_UP24X25__WASMSIMD_MUL16, zero) {
11890 for (uint32_t mz = 0; mz < 25; mz++) {
11891 for (uint32_t channels = 48; channels < 384; channels += 72) {
11892 DWConvMicrokernelTester()
11893 .cr(24)
11894 .kr(25)
11895 .channels(channels)
11896 .input_offset(464)
11897 .zero_index(mz)
11898 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up24x25__wasmsimd_mul16, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
11899 }
11900 }
11901 }
11902 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11903
11904
11905 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,c_eq_1)11906 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_eq_1) {
11907 DWConvMicrokernelTester()
11908 .cr(1)
11909 .kr(9)
11910 .channels(1)
11911 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
11912 }
11913
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,c_gt_1)11914 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_gt_1) {
11915 for (uint32_t channels = 2; channels < 10; channels++) {
11916 DWConvMicrokernelTester()
11917 .cr(1)
11918 .kr(9)
11919 .channels(channels)
11920 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
11921 }
11922 }
11923
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,c_gt_1_with_qmin)11924 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_gt_1_with_qmin) {
11925 for (uint32_t channels = 2; channels < 10; channels++) {
11926 DWConvMicrokernelTester()
11927 .cr(1)
11928 .kr(9)
11929 .channels(channels)
11930 .qmin(128)
11931 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
11932 }
11933 }
11934
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,c_gt_1_with_qmax)11935 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, c_gt_1_with_qmax) {
11936 for (uint32_t channels = 2; channels < 10; channels++) {
11937 DWConvMicrokernelTester()
11938 .cr(1)
11939 .kr(9)
11940 .channels(channels)
11941 .qmax(128)
11942 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
11943 }
11944 }
11945
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,multipixel)11946 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel) {
11947 for (size_t channels = 1; channels <= 5; channels += 1) {
11948 DWConvMicrokernelTester()
11949 .cr(1)
11950 .kr(9)
11951 .channels(channels)
11952 .width(3)
11953 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
11954 }
11955 }
11956
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,multipixel_with_step)11957 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_step) {
11958 for (size_t channels = 1; channels <= 5; channels += 1) {
11959 for (size_t step = 2; step <= 9; step++) {
11960 DWConvMicrokernelTester()
11961 .cr(1)
11962 .kr(9)
11963 .channels(channels)
11964 .width(3)
11965 .step(step)
11966 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
11967 }
11968 }
11969 }
11970
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,multipixel_with_output_stride)11971 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_output_stride) {
11972 for (size_t channels = 1; channels <= 5; channels += 1) {
11973 DWConvMicrokernelTester()
11974 .cr(1)
11975 .kr(9)
11976 .channels(1)
11977 .width(5)
11978 .output_stride(7)
11979 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
11980 }
11981 }
11982
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,multipixel_with_qmin)11983 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_qmin) {
11984 for (size_t channels = 1; channels <= 5; channels += 1) {
11985 DWConvMicrokernelTester()
11986 .cr(1)
11987 .kr(9)
11988 .channels(channels)
11989 .width(3)
11990 .qmin(128)
11991 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
11992 }
11993 }
11994
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,multipixel_with_qmax)11995 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, multipixel_with_qmax) {
11996 for (size_t channels = 1; channels <= 5; channels += 1) {
11997 DWConvMicrokernelTester()
11998 .cr(1)
11999 .kr(9)
12000 .channels(channels)
12001 .width(3)
12002 .qmax(128)
12003 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12004 }
12005 }
12006
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,input_zero_point_only)12007 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, input_zero_point_only) {
12008 for (size_t channels = 1; channels <= 5; channels += 1) {
12009 DWConvMicrokernelTester()
12010 .cr(1)
12011 .kr(9)
12012 .channels(channels)
12013 .width(3)
12014 .input_zero_point(255)
12015 .kernel_zero_point(0)
12016 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12017 }
12018 }
12019
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,kernel_zero_point_only)12020 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, kernel_zero_point_only) {
12021 for (size_t channels = 1; channels <= 5; channels += 1) {
12022 DWConvMicrokernelTester()
12023 .cr(1)
12024 .kr(9)
12025 .channels(channels)
12026 .width(3)
12027 .input_zero_point(0)
12028 .kernel_zero_point(255)
12029 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12030 }
12031 }
12032
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,input_offset)12033 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, input_offset) {
12034 for (uint32_t channels = 2; channels < 16; channels += 3) {
12035 DWConvMicrokernelTester()
12036 .cr(1)
12037 .kr(9)
12038 .channels(channels)
12039 .input_offset(48)
12040 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12041 }
12042 }
12043
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC,zero)12044 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__WASM_FMAGIC, zero) {
12045 for (uint32_t mz = 0; mz < 9; mz++) {
12046 for (uint32_t channels = 2; channels < 16; channels += 3) {
12047 DWConvMicrokernelTester()
12048 .cr(1)
12049 .kr(9)
12050 .channels(channels)
12051 .input_offset(48)
12052 .zero_index(mz)
12053 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12054 }
12055 }
12056 }
12057 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12058
12059
12060 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,c_eq_1)12061 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_eq_1) {
12062 DWConvMicrokernelTester()
12063 .cr(1)
12064 .kr(25)
12065 .channels(1)
12066 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12067 }
12068
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,c_gt_1)12069 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_gt_1) {
12070 for (uint32_t channels = 2; channels < 10; channels++) {
12071 DWConvMicrokernelTester()
12072 .cr(1)
12073 .kr(25)
12074 .channels(channels)
12075 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12076 }
12077 }
12078
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,c_gt_1_with_qmin)12079 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_gt_1_with_qmin) {
12080 for (uint32_t channels = 2; channels < 10; channels++) {
12081 DWConvMicrokernelTester()
12082 .cr(1)
12083 .kr(25)
12084 .channels(channels)
12085 .qmin(128)
12086 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12087 }
12088 }
12089
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,c_gt_1_with_qmax)12090 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, c_gt_1_with_qmax) {
12091 for (uint32_t channels = 2; channels < 10; channels++) {
12092 DWConvMicrokernelTester()
12093 .cr(1)
12094 .kr(25)
12095 .channels(channels)
12096 .qmax(128)
12097 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12098 }
12099 }
12100
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,multipixel)12101 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel) {
12102 for (size_t channels = 1; channels <= 5; channels += 1) {
12103 DWConvMicrokernelTester()
12104 .cr(1)
12105 .kr(25)
12106 .channels(channels)
12107 .width(3)
12108 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12109 }
12110 }
12111
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,multipixel_with_step)12112 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_step) {
12113 for (size_t channels = 1; channels <= 5; channels += 1) {
12114 for (size_t step = 2; step <= 25; step++) {
12115 DWConvMicrokernelTester()
12116 .cr(1)
12117 .kr(25)
12118 .channels(channels)
12119 .width(3)
12120 .step(step)
12121 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12122 }
12123 }
12124 }
12125
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,multipixel_with_output_stride)12126 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_output_stride) {
12127 for (size_t channels = 1; channels <= 5; channels += 1) {
12128 DWConvMicrokernelTester()
12129 .cr(1)
12130 .kr(25)
12131 .channels(1)
12132 .width(5)
12133 .output_stride(7)
12134 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12135 }
12136 }
12137
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,multipixel_with_qmin)12138 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_qmin) {
12139 for (size_t channels = 1; channels <= 5; channels += 1) {
12140 DWConvMicrokernelTester()
12141 .cr(1)
12142 .kr(25)
12143 .channels(channels)
12144 .width(3)
12145 .qmin(128)
12146 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12147 }
12148 }
12149
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,multipixel_with_qmax)12150 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, multipixel_with_qmax) {
12151 for (size_t channels = 1; channels <= 5; channels += 1) {
12152 DWConvMicrokernelTester()
12153 .cr(1)
12154 .kr(25)
12155 .channels(channels)
12156 .width(3)
12157 .qmax(128)
12158 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12159 }
12160 }
12161
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,input_zero_point_only)12162 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, input_zero_point_only) {
12163 for (size_t channels = 1; channels <= 5; channels += 1) {
12164 DWConvMicrokernelTester()
12165 .cr(1)
12166 .kr(25)
12167 .channels(channels)
12168 .width(3)
12169 .input_zero_point(255)
12170 .kernel_zero_point(0)
12171 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12172 }
12173 }
12174
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,kernel_zero_point_only)12175 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, kernel_zero_point_only) {
12176 for (size_t channels = 1; channels <= 5; channels += 1) {
12177 DWConvMicrokernelTester()
12178 .cr(1)
12179 .kr(25)
12180 .channels(channels)
12181 .width(3)
12182 .input_zero_point(0)
12183 .kernel_zero_point(255)
12184 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12185 }
12186 }
12187
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,input_offset)12188 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, input_offset) {
12189 for (uint32_t channels = 2; channels < 16; channels += 3) {
12190 DWConvMicrokernelTester()
12191 .cr(1)
12192 .kr(25)
12193 .channels(channels)
12194 .input_offset(48)
12195 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12196 }
12197 }
12198
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC,zero)12199 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__WASM_FMAGIC, zero) {
12200 for (uint32_t mz = 0; mz < 25; mz++) {
12201 for (uint32_t channels = 2; channels < 16; channels += 3) {
12202 DWConvMicrokernelTester()
12203 .cr(1)
12204 .kr(25)
12205 .channels(channels)
12206 .input_offset(48)
12207 .zero_index(mz)
12208 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12209 }
12210 }
12211 }
12212 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12213
12214
12215 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_eq_2)12216 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_eq_2) {
12217 DWConvMicrokernelTester()
12218 .cr(2)
12219 .kr(9)
12220 .channels(2)
12221 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12222 }
12223
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_div_2)12224 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_div_2) {
12225 for (uint32_t channels = 4; channels < 32; channels += 6) {
12226 DWConvMicrokernelTester()
12227 .cr(2)
12228 .kr(9)
12229 .channels(channels)
12230 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12231 }
12232 }
12233
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_div_2_with_qmin)12234 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_div_2_with_qmin) {
12235 for (uint32_t channels = 4; channels < 32; channels += 6) {
12236 DWConvMicrokernelTester()
12237 .cr(2)
12238 .kr(9)
12239 .channels(channels)
12240 .qmin(128)
12241 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12242 }
12243 }
12244
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_div_2_with_qmax)12245 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_div_2_with_qmax) {
12246 for (uint32_t channels = 4; channels < 32; channels += 6) {
12247 DWConvMicrokernelTester()
12248 .cr(2)
12249 .kr(9)
12250 .channels(channels)
12251 .qmax(128)
12252 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12253 }
12254 }
12255
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_lt_2)12256 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_lt_2) {
12257 for (uint32_t channels = 1; channels < 2; channels++) {
12258 DWConvMicrokernelTester()
12259 .cr(2)
12260 .kr(9)
12261 .channels(channels)
12262 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12263 }
12264 }
12265
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_gt_2)12266 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_gt_2) {
12267 for (uint32_t channels = 3; channels < 4; channels++) {
12268 DWConvMicrokernelTester()
12269 .cr(2)
12270 .kr(9)
12271 .channels(channels)
12272 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12273 }
12274 }
12275
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_gt_2_with_qmin)12276 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_gt_2_with_qmin) {
12277 for (uint32_t channels = 3; channels < 4; channels++) {
12278 DWConvMicrokernelTester()
12279 .cr(2)
12280 .kr(9)
12281 .channels(channels)
12282 .qmin(128)
12283 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12284 }
12285 }
12286
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,c_gt_2_with_qmax)12287 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, c_gt_2_with_qmax) {
12288 for (uint32_t channels = 3; channels < 4; channels++) {
12289 DWConvMicrokernelTester()
12290 .cr(2)
12291 .kr(9)
12292 .channels(channels)
12293 .qmax(128)
12294 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12295 }
12296 }
12297
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,multipixel)12298 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel) {
12299 for (size_t channels = 1; channels <= 10; channels += 1) {
12300 DWConvMicrokernelTester()
12301 .cr(2)
12302 .kr(9)
12303 .channels(channels)
12304 .width(3)
12305 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12306 }
12307 }
12308
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,multipixel_with_step)12309 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_step) {
12310 for (size_t channels = 1; channels <= 10; channels += 1) {
12311 for (size_t step = 2; step <= 9; step++) {
12312 DWConvMicrokernelTester()
12313 .cr(2)
12314 .kr(9)
12315 .channels(channels)
12316 .width(3)
12317 .step(step)
12318 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12319 }
12320 }
12321 }
12322
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,multipixel_with_output_stride)12323 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_output_stride) {
12324 for (size_t channels = 1; channels <= 10; channels += 1) {
12325 DWConvMicrokernelTester()
12326 .cr(2)
12327 .kr(9)
12328 .channels(2)
12329 .width(5)
12330 .output_stride(13)
12331 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12332 }
12333 }
12334
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,multipixel_with_qmin)12335 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_qmin) {
12336 for (size_t channels = 1; channels <= 10; channels += 1) {
12337 DWConvMicrokernelTester()
12338 .cr(2)
12339 .kr(9)
12340 .channels(channels)
12341 .width(3)
12342 .qmin(128)
12343 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12344 }
12345 }
12346
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,multipixel_with_qmax)12347 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, multipixel_with_qmax) {
12348 for (size_t channels = 1; channels <= 10; channels += 1) {
12349 DWConvMicrokernelTester()
12350 .cr(2)
12351 .kr(9)
12352 .channels(channels)
12353 .width(3)
12354 .qmax(128)
12355 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12356 }
12357 }
12358
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,input_zero_point_only)12359 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, input_zero_point_only) {
12360 for (size_t channels = 1; channels <= 10; channels += 1) {
12361 DWConvMicrokernelTester()
12362 .cr(2)
12363 .kr(9)
12364 .channels(channels)
12365 .width(3)
12366 .input_zero_point(255)
12367 .kernel_zero_point(0)
12368 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12369 }
12370 }
12371
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,kernel_zero_point_only)12372 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, kernel_zero_point_only) {
12373 for (size_t channels = 1; channels <= 10; channels += 1) {
12374 DWConvMicrokernelTester()
12375 .cr(2)
12376 .kr(9)
12377 .channels(channels)
12378 .width(3)
12379 .input_zero_point(0)
12380 .kernel_zero_point(255)
12381 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12382 }
12383 }
12384
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,input_offset)12385 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, input_offset) {
12386 for (uint32_t channels = 4; channels < 32; channels += 6) {
12387 DWConvMicrokernelTester()
12388 .cr(2)
12389 .kr(9)
12390 .channels(channels)
12391 .input_offset(80)
12392 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12393 }
12394 }
12395
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC,zero)12396 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__WASM_FMAGIC, zero) {
12397 for (uint32_t mz = 0; mz < 9; mz++) {
12398 for (uint32_t channels = 4; channels < 32; channels += 6) {
12399 DWConvMicrokernelTester()
12400 .cr(2)
12401 .kr(9)
12402 .channels(channels)
12403 .input_offset(80)
12404 .zero_index(mz)
12405 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12406 }
12407 }
12408 }
12409 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12410
12411
12412 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_eq_2)12413 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_eq_2) {
12414 DWConvMicrokernelTester()
12415 .cr(2)
12416 .kr(25)
12417 .channels(2)
12418 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12419 }
12420
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_div_2)12421 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_div_2) {
12422 for (uint32_t channels = 4; channels < 32; channels += 6) {
12423 DWConvMicrokernelTester()
12424 .cr(2)
12425 .kr(25)
12426 .channels(channels)
12427 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12428 }
12429 }
12430
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_div_2_with_qmin)12431 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_div_2_with_qmin) {
12432 for (uint32_t channels = 4; channels < 32; channels += 6) {
12433 DWConvMicrokernelTester()
12434 .cr(2)
12435 .kr(25)
12436 .channels(channels)
12437 .qmin(128)
12438 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12439 }
12440 }
12441
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_div_2_with_qmax)12442 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_div_2_with_qmax) {
12443 for (uint32_t channels = 4; channels < 32; channels += 6) {
12444 DWConvMicrokernelTester()
12445 .cr(2)
12446 .kr(25)
12447 .channels(channels)
12448 .qmax(128)
12449 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12450 }
12451 }
12452
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_lt_2)12453 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_lt_2) {
12454 for (uint32_t channels = 1; channels < 2; channels++) {
12455 DWConvMicrokernelTester()
12456 .cr(2)
12457 .kr(25)
12458 .channels(channels)
12459 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12460 }
12461 }
12462
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_gt_2)12463 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_gt_2) {
12464 for (uint32_t channels = 3; channels < 4; channels++) {
12465 DWConvMicrokernelTester()
12466 .cr(2)
12467 .kr(25)
12468 .channels(channels)
12469 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12470 }
12471 }
12472
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_gt_2_with_qmin)12473 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_gt_2_with_qmin) {
12474 for (uint32_t channels = 3; channels < 4; channels++) {
12475 DWConvMicrokernelTester()
12476 .cr(2)
12477 .kr(25)
12478 .channels(channels)
12479 .qmin(128)
12480 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12481 }
12482 }
12483
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,c_gt_2_with_qmax)12484 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, c_gt_2_with_qmax) {
12485 for (uint32_t channels = 3; channels < 4; channels++) {
12486 DWConvMicrokernelTester()
12487 .cr(2)
12488 .kr(25)
12489 .channels(channels)
12490 .qmax(128)
12491 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12492 }
12493 }
12494
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,multipixel)12495 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel) {
12496 for (size_t channels = 1; channels <= 10; channels += 1) {
12497 DWConvMicrokernelTester()
12498 .cr(2)
12499 .kr(25)
12500 .channels(channels)
12501 .width(3)
12502 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12503 }
12504 }
12505
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,multipixel_with_step)12506 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_step) {
12507 for (size_t channels = 1; channels <= 10; channels += 1) {
12508 for (size_t step = 2; step <= 25; step++) {
12509 DWConvMicrokernelTester()
12510 .cr(2)
12511 .kr(25)
12512 .channels(channels)
12513 .width(3)
12514 .step(step)
12515 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12516 }
12517 }
12518 }
12519
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,multipixel_with_output_stride)12520 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_output_stride) {
12521 for (size_t channels = 1; channels <= 10; channels += 1) {
12522 DWConvMicrokernelTester()
12523 .cr(2)
12524 .kr(25)
12525 .channels(2)
12526 .width(5)
12527 .output_stride(13)
12528 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12529 }
12530 }
12531
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,multipixel_with_qmin)12532 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_qmin) {
12533 for (size_t channels = 1; channels <= 10; channels += 1) {
12534 DWConvMicrokernelTester()
12535 .cr(2)
12536 .kr(25)
12537 .channels(channels)
12538 .width(3)
12539 .qmin(128)
12540 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12541 }
12542 }
12543
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,multipixel_with_qmax)12544 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, multipixel_with_qmax) {
12545 for (size_t channels = 1; channels <= 10; channels += 1) {
12546 DWConvMicrokernelTester()
12547 .cr(2)
12548 .kr(25)
12549 .channels(channels)
12550 .width(3)
12551 .qmax(128)
12552 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12553 }
12554 }
12555
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,input_zero_point_only)12556 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, input_zero_point_only) {
12557 for (size_t channels = 1; channels <= 10; channels += 1) {
12558 DWConvMicrokernelTester()
12559 .cr(2)
12560 .kr(25)
12561 .channels(channels)
12562 .width(3)
12563 .input_zero_point(255)
12564 .kernel_zero_point(0)
12565 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12566 }
12567 }
12568
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,kernel_zero_point_only)12569 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, kernel_zero_point_only) {
12570 for (size_t channels = 1; channels <= 10; channels += 1) {
12571 DWConvMicrokernelTester()
12572 .cr(2)
12573 .kr(25)
12574 .channels(channels)
12575 .width(3)
12576 .input_zero_point(0)
12577 .kernel_zero_point(255)
12578 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12579 }
12580 }
12581
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,input_offset)12582 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, input_offset) {
12583 for (uint32_t channels = 4; channels < 32; channels += 6) {
12584 DWConvMicrokernelTester()
12585 .cr(2)
12586 .kr(25)
12587 .channels(channels)
12588 .input_offset(80)
12589 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12590 }
12591 }
12592
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC,zero)12593 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__WASM_FMAGIC, zero) {
12594 for (uint32_t mz = 0; mz < 25; mz++) {
12595 for (uint32_t channels = 4; channels < 32; channels += 6) {
12596 DWConvMicrokernelTester()
12597 .cr(2)
12598 .kr(25)
12599 .channels(channels)
12600 .input_offset(80)
12601 .zero_index(mz)
12602 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12603 }
12604 }
12605 }
12606 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12607
12608
12609 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_eq_4)12610 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_eq_4) {
12611 DWConvMicrokernelTester()
12612 .cr(4)
12613 .kr(9)
12614 .channels(4)
12615 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12616 }
12617
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_div_4)12618 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_div_4) {
12619 for (uint32_t channels = 8; channels < 64; channels += 12) {
12620 DWConvMicrokernelTester()
12621 .cr(4)
12622 .kr(9)
12623 .channels(channels)
12624 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12625 }
12626 }
12627
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_div_4_with_qmin)12628 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_div_4_with_qmin) {
12629 for (uint32_t channels = 8; channels < 64; channels += 12) {
12630 DWConvMicrokernelTester()
12631 .cr(4)
12632 .kr(9)
12633 .channels(channels)
12634 .qmin(128)
12635 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12636 }
12637 }
12638
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_div_4_with_qmax)12639 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_div_4_with_qmax) {
12640 for (uint32_t channels = 8; channels < 64; channels += 12) {
12641 DWConvMicrokernelTester()
12642 .cr(4)
12643 .kr(9)
12644 .channels(channels)
12645 .qmax(128)
12646 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12647 }
12648 }
12649
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_lt_4)12650 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_lt_4) {
12651 for (uint32_t channels = 1; channels < 4; channels++) {
12652 DWConvMicrokernelTester()
12653 .cr(4)
12654 .kr(9)
12655 .channels(channels)
12656 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12657 }
12658 }
12659
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_gt_4)12660 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_gt_4) {
12661 for (uint32_t channels = 5; channels < 8; channels++) {
12662 DWConvMicrokernelTester()
12663 .cr(4)
12664 .kr(9)
12665 .channels(channels)
12666 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12667 }
12668 }
12669
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_gt_4_with_qmin)12670 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_gt_4_with_qmin) {
12671 for (uint32_t channels = 5; channels < 8; channels++) {
12672 DWConvMicrokernelTester()
12673 .cr(4)
12674 .kr(9)
12675 .channels(channels)
12676 .qmin(128)
12677 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12678 }
12679 }
12680
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,c_gt_4_with_qmax)12681 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, c_gt_4_with_qmax) {
12682 for (uint32_t channels = 5; channels < 8; channels++) {
12683 DWConvMicrokernelTester()
12684 .cr(4)
12685 .kr(9)
12686 .channels(channels)
12687 .qmax(128)
12688 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12689 }
12690 }
12691
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,multipixel)12692 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel) {
12693 for (size_t channels = 1; channels <= 20; channels += 3) {
12694 DWConvMicrokernelTester()
12695 .cr(4)
12696 .kr(9)
12697 .channels(channels)
12698 .width(3)
12699 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12700 }
12701 }
12702
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,multipixel_with_step)12703 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_step) {
12704 for (size_t channels = 1; channels <= 20; channels += 3) {
12705 for (size_t step = 2; step <= 9; step++) {
12706 DWConvMicrokernelTester()
12707 .cr(4)
12708 .kr(9)
12709 .channels(channels)
12710 .width(3)
12711 .step(step)
12712 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12713 }
12714 }
12715 }
12716
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,multipixel_with_output_stride)12717 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_output_stride) {
12718 for (size_t channels = 1; channels <= 20; channels += 3) {
12719 DWConvMicrokernelTester()
12720 .cr(4)
12721 .kr(9)
12722 .channels(4)
12723 .width(5)
12724 .output_stride(23)
12725 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12726 }
12727 }
12728
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,multipixel_with_qmin)12729 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_qmin) {
12730 for (size_t channels = 1; channels <= 20; channels += 3) {
12731 DWConvMicrokernelTester()
12732 .cr(4)
12733 .kr(9)
12734 .channels(channels)
12735 .width(3)
12736 .qmin(128)
12737 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12738 }
12739 }
12740
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,multipixel_with_qmax)12741 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, multipixel_with_qmax) {
12742 for (size_t channels = 1; channels <= 20; channels += 3) {
12743 DWConvMicrokernelTester()
12744 .cr(4)
12745 .kr(9)
12746 .channels(channels)
12747 .width(3)
12748 .qmax(128)
12749 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12750 }
12751 }
12752
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,input_zero_point_only)12753 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, input_zero_point_only) {
12754 for (size_t channels = 1; channels <= 20; channels += 3) {
12755 DWConvMicrokernelTester()
12756 .cr(4)
12757 .kr(9)
12758 .channels(channels)
12759 .width(3)
12760 .input_zero_point(255)
12761 .kernel_zero_point(0)
12762 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12763 }
12764 }
12765
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,kernel_zero_point_only)12766 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, kernel_zero_point_only) {
12767 for (size_t channels = 1; channels <= 20; channels += 3) {
12768 DWConvMicrokernelTester()
12769 .cr(4)
12770 .kr(9)
12771 .channels(channels)
12772 .width(3)
12773 .input_zero_point(0)
12774 .kernel_zero_point(255)
12775 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12776 }
12777 }
12778
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,input_offset)12779 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, input_offset) {
12780 for (uint32_t channels = 8; channels < 64; channels += 12) {
12781 DWConvMicrokernelTester()
12782 .cr(4)
12783 .kr(9)
12784 .channels(channels)
12785 .input_offset(112)
12786 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12787 }
12788 }
12789
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC,zero)12790 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__WASM_FMAGIC, zero) {
12791 for (uint32_t mz = 0; mz < 9; mz++) {
12792 for (uint32_t channels = 8; channels < 64; channels += 12) {
12793 DWConvMicrokernelTester()
12794 .cr(4)
12795 .kr(9)
12796 .channels(channels)
12797 .input_offset(112)
12798 .zero_index(mz)
12799 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12800 }
12801 }
12802 }
12803 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12804
12805
12806 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_eq_4)12807 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_eq_4) {
12808 DWConvMicrokernelTester()
12809 .cr(4)
12810 .kr(25)
12811 .channels(4)
12812 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12813 }
12814
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_div_4)12815 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_div_4) {
12816 for (uint32_t channels = 8; channels < 64; channels += 12) {
12817 DWConvMicrokernelTester()
12818 .cr(4)
12819 .kr(25)
12820 .channels(channels)
12821 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12822 }
12823 }
12824
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_div_4_with_qmin)12825 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_div_4_with_qmin) {
12826 for (uint32_t channels = 8; channels < 64; channels += 12) {
12827 DWConvMicrokernelTester()
12828 .cr(4)
12829 .kr(25)
12830 .channels(channels)
12831 .qmin(128)
12832 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12833 }
12834 }
12835
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_div_4_with_qmax)12836 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_div_4_with_qmax) {
12837 for (uint32_t channels = 8; channels < 64; channels += 12) {
12838 DWConvMicrokernelTester()
12839 .cr(4)
12840 .kr(25)
12841 .channels(channels)
12842 .qmax(128)
12843 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12844 }
12845 }
12846
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_lt_4)12847 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_lt_4) {
12848 for (uint32_t channels = 1; channels < 4; channels++) {
12849 DWConvMicrokernelTester()
12850 .cr(4)
12851 .kr(25)
12852 .channels(channels)
12853 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12854 }
12855 }
12856
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_gt_4)12857 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_gt_4) {
12858 for (uint32_t channels = 5; channels < 8; channels++) {
12859 DWConvMicrokernelTester()
12860 .cr(4)
12861 .kr(25)
12862 .channels(channels)
12863 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12864 }
12865 }
12866
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_gt_4_with_qmin)12867 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_gt_4_with_qmin) {
12868 for (uint32_t channels = 5; channels < 8; channels++) {
12869 DWConvMicrokernelTester()
12870 .cr(4)
12871 .kr(25)
12872 .channels(channels)
12873 .qmin(128)
12874 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12875 }
12876 }
12877
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,c_gt_4_with_qmax)12878 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, c_gt_4_with_qmax) {
12879 for (uint32_t channels = 5; channels < 8; channels++) {
12880 DWConvMicrokernelTester()
12881 .cr(4)
12882 .kr(25)
12883 .channels(channels)
12884 .qmax(128)
12885 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12886 }
12887 }
12888
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,multipixel)12889 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel) {
12890 for (size_t channels = 1; channels <= 20; channels += 3) {
12891 DWConvMicrokernelTester()
12892 .cr(4)
12893 .kr(25)
12894 .channels(channels)
12895 .width(3)
12896 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12897 }
12898 }
12899
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,multipixel_with_step)12900 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_step) {
12901 for (size_t channels = 1; channels <= 20; channels += 3) {
12902 for (size_t step = 2; step <= 25; step++) {
12903 DWConvMicrokernelTester()
12904 .cr(4)
12905 .kr(25)
12906 .channels(channels)
12907 .width(3)
12908 .step(step)
12909 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12910 }
12911 }
12912 }
12913
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,multipixel_with_output_stride)12914 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_output_stride) {
12915 for (size_t channels = 1; channels <= 20; channels += 3) {
12916 DWConvMicrokernelTester()
12917 .cr(4)
12918 .kr(25)
12919 .channels(4)
12920 .width(5)
12921 .output_stride(23)
12922 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12923 }
12924 }
12925
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,multipixel_with_qmin)12926 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_qmin) {
12927 for (size_t channels = 1; channels <= 20; channels += 3) {
12928 DWConvMicrokernelTester()
12929 .cr(4)
12930 .kr(25)
12931 .channels(channels)
12932 .width(3)
12933 .qmin(128)
12934 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12935 }
12936 }
12937
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,multipixel_with_qmax)12938 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, multipixel_with_qmax) {
12939 for (size_t channels = 1; channels <= 20; channels += 3) {
12940 DWConvMicrokernelTester()
12941 .cr(4)
12942 .kr(25)
12943 .channels(channels)
12944 .width(3)
12945 .qmax(128)
12946 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12947 }
12948 }
12949
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,input_zero_point_only)12950 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, input_zero_point_only) {
12951 for (size_t channels = 1; channels <= 20; channels += 3) {
12952 DWConvMicrokernelTester()
12953 .cr(4)
12954 .kr(25)
12955 .channels(channels)
12956 .width(3)
12957 .input_zero_point(255)
12958 .kernel_zero_point(0)
12959 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12960 }
12961 }
12962
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,kernel_zero_point_only)12963 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, kernel_zero_point_only) {
12964 for (size_t channels = 1; channels <= 20; channels += 3) {
12965 DWConvMicrokernelTester()
12966 .cr(4)
12967 .kr(25)
12968 .channels(channels)
12969 .width(3)
12970 .input_zero_point(0)
12971 .kernel_zero_point(255)
12972 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12973 }
12974 }
12975
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,input_offset)12976 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, input_offset) {
12977 for (uint32_t channels = 8; channels < 64; channels += 12) {
12978 DWConvMicrokernelTester()
12979 .cr(4)
12980 .kr(25)
12981 .channels(channels)
12982 .input_offset(112)
12983 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12984 }
12985 }
12986
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC,zero)12987 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__WASM_FMAGIC, zero) {
12988 for (uint32_t mz = 0; mz < 25; mz++) {
12989 for (uint32_t channels = 8; channels < 64; channels += 12) {
12990 DWConvMicrokernelTester()
12991 .cr(4)
12992 .kr(25)
12993 .channels(channels)
12994 .input_offset(112)
12995 .zero_index(mz)
12996 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
12997 }
12998 }
12999 }
13000 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
13001
13002
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,c_eq_1)13003 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_eq_1) {
13004 DWConvMicrokernelTester()
13005 .cr(1)
13006 .kr(9)
13007 .channels(1)
13008 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13009 }
13010
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,c_gt_1)13011 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_gt_1) {
13012 for (uint32_t channels = 2; channels < 10; channels++) {
13013 DWConvMicrokernelTester()
13014 .cr(1)
13015 .kr(9)
13016 .channels(channels)
13017 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13018 }
13019 }
13020
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,c_gt_1_with_qmin)13021 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_gt_1_with_qmin) {
13022 for (uint32_t channels = 2; channels < 10; channels++) {
13023 DWConvMicrokernelTester()
13024 .cr(1)
13025 .kr(9)
13026 .channels(channels)
13027 .qmin(128)
13028 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13029 }
13030 }
13031
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,c_gt_1_with_qmax)13032 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, c_gt_1_with_qmax) {
13033 for (uint32_t channels = 2; channels < 10; channels++) {
13034 DWConvMicrokernelTester()
13035 .cr(1)
13036 .kr(9)
13037 .channels(channels)
13038 .qmax(128)
13039 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13040 }
13041 }
13042
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,multipixel)13043 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel) {
13044 for (size_t channels = 1; channels <= 5; channels += 1) {
13045 DWConvMicrokernelTester()
13046 .cr(1)
13047 .kr(9)
13048 .channels(channels)
13049 .width(3)
13050 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13051 }
13052 }
13053
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,multipixel_with_step)13054 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_step) {
13055 for (size_t channels = 1; channels <= 5; channels += 1) {
13056 for (size_t step = 2; step <= 9; step++) {
13057 DWConvMicrokernelTester()
13058 .cr(1)
13059 .kr(9)
13060 .channels(channels)
13061 .width(3)
13062 .step(step)
13063 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13064 }
13065 }
13066 }
13067
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,multipixel_with_output_stride)13068 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_output_stride) {
13069 for (size_t channels = 1; channels <= 5; channels += 1) {
13070 DWConvMicrokernelTester()
13071 .cr(1)
13072 .kr(9)
13073 .channels(1)
13074 .width(5)
13075 .output_stride(7)
13076 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13077 }
13078 }
13079
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,multipixel_with_qmin)13080 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_qmin) {
13081 for (size_t channels = 1; channels <= 5; channels += 1) {
13082 DWConvMicrokernelTester()
13083 .cr(1)
13084 .kr(9)
13085 .channels(channels)
13086 .width(3)
13087 .qmin(128)
13088 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13089 }
13090 }
13091
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,multipixel_with_qmax)13092 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, multipixel_with_qmax) {
13093 for (size_t channels = 1; channels <= 5; channels += 1) {
13094 DWConvMicrokernelTester()
13095 .cr(1)
13096 .kr(9)
13097 .channels(channels)
13098 .width(3)
13099 .qmax(128)
13100 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13101 }
13102 }
13103
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,input_zero_point_only)13104 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, input_zero_point_only) {
13105 for (size_t channels = 1; channels <= 5; channels += 1) {
13106 DWConvMicrokernelTester()
13107 .cr(1)
13108 .kr(9)
13109 .channels(channels)
13110 .width(3)
13111 .input_zero_point(255)
13112 .kernel_zero_point(0)
13113 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13114 }
13115 }
13116
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,kernel_zero_point_only)13117 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, kernel_zero_point_only) {
13118 for (size_t channels = 1; channels <= 5; channels += 1) {
13119 DWConvMicrokernelTester()
13120 .cr(1)
13121 .kr(9)
13122 .channels(channels)
13123 .width(3)
13124 .input_zero_point(0)
13125 .kernel_zero_point(255)
13126 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13127 }
13128 }
13129
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,input_offset)13130 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, input_offset) {
13131 for (uint32_t channels = 2; channels < 16; channels += 3) {
13132 DWConvMicrokernelTester()
13133 .cr(1)
13134 .kr(9)
13135 .channels(channels)
13136 .input_offset(48)
13137 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13138 }
13139 }
13140
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC,zero)13141 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_FMAGIC, zero) {
13142 for (uint32_t mz = 0; mz < 9; mz++) {
13143 for (uint32_t channels = 2; channels < 16; channels += 3) {
13144 DWConvMicrokernelTester()
13145 .cr(1)
13146 .kr(9)
13147 .channels(channels)
13148 .input_offset(48)
13149 .zero_index(mz)
13150 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13151 }
13152 }
13153 }
13154
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,c_eq_1)13155 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_eq_1) {
13156 DWConvMicrokernelTester()
13157 .cr(1)
13158 .kr(9)
13159 .channels(1)
13160 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13161 }
13162
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,c_gt_1)13163 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1) {
13164 for (uint32_t channels = 2; channels < 10; channels++) {
13165 DWConvMicrokernelTester()
13166 .cr(1)
13167 .kr(9)
13168 .channels(channels)
13169 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13170 }
13171 }
13172
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,c_gt_1_with_qmin)13173 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1_with_qmin) {
13174 for (uint32_t channels = 2; channels < 10; channels++) {
13175 DWConvMicrokernelTester()
13176 .cr(1)
13177 .kr(9)
13178 .channels(channels)
13179 .qmin(128)
13180 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13181 }
13182 }
13183
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,c_gt_1_with_qmax)13184 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, c_gt_1_with_qmax) {
13185 for (uint32_t channels = 2; channels < 10; channels++) {
13186 DWConvMicrokernelTester()
13187 .cr(1)
13188 .kr(9)
13189 .channels(channels)
13190 .qmax(128)
13191 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13192 }
13193 }
13194
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,multipixel)13195 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel) {
13196 for (size_t channels = 1; channels <= 5; channels += 1) {
13197 DWConvMicrokernelTester()
13198 .cr(1)
13199 .kr(9)
13200 .channels(channels)
13201 .width(3)
13202 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13203 }
13204 }
13205
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,multipixel_with_step)13206 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_step) {
13207 for (size_t channels = 1; channels <= 5; channels += 1) {
13208 for (size_t step = 2; step <= 9; step++) {
13209 DWConvMicrokernelTester()
13210 .cr(1)
13211 .kr(9)
13212 .channels(channels)
13213 .width(3)
13214 .step(step)
13215 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13216 }
13217 }
13218 }
13219
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,multipixel_with_output_stride)13220 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
13221 for (size_t channels = 1; channels <= 5; channels += 1) {
13222 DWConvMicrokernelTester()
13223 .cr(1)
13224 .kr(9)
13225 .channels(1)
13226 .width(5)
13227 .output_stride(7)
13228 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13229 }
13230 }
13231
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,multipixel_with_qmin)13232 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_qmin) {
13233 for (size_t channels = 1; channels <= 5; channels += 1) {
13234 DWConvMicrokernelTester()
13235 .cr(1)
13236 .kr(9)
13237 .channels(channels)
13238 .width(3)
13239 .qmin(128)
13240 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13241 }
13242 }
13243
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,multipixel_with_qmax)13244 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, multipixel_with_qmax) {
13245 for (size_t channels = 1; channels <= 5; channels += 1) {
13246 DWConvMicrokernelTester()
13247 .cr(1)
13248 .kr(9)
13249 .channels(channels)
13250 .width(3)
13251 .qmax(128)
13252 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13253 }
13254 }
13255
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,input_zero_point_only)13256 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, input_zero_point_only) {
13257 for (size_t channels = 1; channels <= 5; channels += 1) {
13258 DWConvMicrokernelTester()
13259 .cr(1)
13260 .kr(9)
13261 .channels(channels)
13262 .width(3)
13263 .input_zero_point(255)
13264 .kernel_zero_point(0)
13265 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13266 }
13267 }
13268
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,kernel_zero_point_only)13269 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, kernel_zero_point_only) {
13270 for (size_t channels = 1; channels <= 5; channels += 1) {
13271 DWConvMicrokernelTester()
13272 .cr(1)
13273 .kr(9)
13274 .channels(channels)
13275 .width(3)
13276 .input_zero_point(0)
13277 .kernel_zero_point(255)
13278 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13279 }
13280 }
13281
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,input_offset)13282 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, input_offset) {
13283 for (uint32_t channels = 2; channels < 16; channels += 3) {
13284 DWConvMicrokernelTester()
13285 .cr(1)
13286 .kr(9)
13287 .channels(channels)
13288 .input_offset(48)
13289 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13290 }
13291 }
13292
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC,zero)13293 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_IMAGIC, zero) {
13294 for (uint32_t mz = 0; mz < 9; mz++) {
13295 for (uint32_t channels = 2; channels < 16; channels += 3) {
13296 DWConvMicrokernelTester()
13297 .cr(1)
13298 .kr(9)
13299 .channels(channels)
13300 .input_offset(48)
13301 .zero_index(mz)
13302 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13303 }
13304 }
13305 }
13306
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,c_eq_1)13307 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_eq_1) {
13308 DWConvMicrokernelTester()
13309 .cr(1)
13310 .kr(9)
13311 .channels(1)
13312 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13313 }
13314
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,c_gt_1)13315 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1) {
13316 for (uint32_t channels = 2; channels < 10; channels++) {
13317 DWConvMicrokernelTester()
13318 .cr(1)
13319 .kr(9)
13320 .channels(channels)
13321 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13322 }
13323 }
13324
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,c_gt_1_with_qmin)13325 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmin) {
13326 for (uint32_t channels = 2; channels < 10; channels++) {
13327 DWConvMicrokernelTester()
13328 .cr(1)
13329 .kr(9)
13330 .channels(channels)
13331 .qmin(128)
13332 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13333 }
13334 }
13335
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,c_gt_1_with_qmax)13336 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, c_gt_1_with_qmax) {
13337 for (uint32_t channels = 2; channels < 10; channels++) {
13338 DWConvMicrokernelTester()
13339 .cr(1)
13340 .kr(9)
13341 .channels(channels)
13342 .qmax(128)
13343 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13344 }
13345 }
13346
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,multipixel)13347 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel) {
13348 for (size_t channels = 1; channels <= 5; channels += 1) {
13349 DWConvMicrokernelTester()
13350 .cr(1)
13351 .kr(9)
13352 .channels(channels)
13353 .width(3)
13354 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13355 }
13356 }
13357
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,multipixel_with_step)13358 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_step) {
13359 for (size_t channels = 1; channels <= 5; channels += 1) {
13360 for (size_t step = 2; step <= 9; step++) {
13361 DWConvMicrokernelTester()
13362 .cr(1)
13363 .kr(9)
13364 .channels(channels)
13365 .width(3)
13366 .step(step)
13367 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13368 }
13369 }
13370 }
13371
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,multipixel_with_output_stride)13372 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_output_stride) {
13373 for (size_t channels = 1; channels <= 5; channels += 1) {
13374 DWConvMicrokernelTester()
13375 .cr(1)
13376 .kr(9)
13377 .channels(1)
13378 .width(5)
13379 .output_stride(7)
13380 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13381 }
13382 }
13383
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,multipixel_with_qmin)13384 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmin) {
13385 for (size_t channels = 1; channels <= 5; channels += 1) {
13386 DWConvMicrokernelTester()
13387 .cr(1)
13388 .kr(9)
13389 .channels(channels)
13390 .width(3)
13391 .qmin(128)
13392 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13393 }
13394 }
13395
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,multipixel_with_qmax)13396 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, multipixel_with_qmax) {
13397 for (size_t channels = 1; channels <= 5; channels += 1) {
13398 DWConvMicrokernelTester()
13399 .cr(1)
13400 .kr(9)
13401 .channels(channels)
13402 .width(3)
13403 .qmax(128)
13404 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13405 }
13406 }
13407
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,input_zero_point_only)13408 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, input_zero_point_only) {
13409 for (size_t channels = 1; channels <= 5; channels += 1) {
13410 DWConvMicrokernelTester()
13411 .cr(1)
13412 .kr(9)
13413 .channels(channels)
13414 .width(3)
13415 .input_zero_point(255)
13416 .kernel_zero_point(0)
13417 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13418 }
13419 }
13420
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,kernel_zero_point_only)13421 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, kernel_zero_point_only) {
13422 for (size_t channels = 1; channels <= 5; channels += 1) {
13423 DWConvMicrokernelTester()
13424 .cr(1)
13425 .kr(9)
13426 .channels(channels)
13427 .width(3)
13428 .input_zero_point(0)
13429 .kernel_zero_point(255)
13430 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13431 }
13432 }
13433
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,input_offset)13434 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, input_offset) {
13435 for (uint32_t channels = 2; channels < 16; channels += 3) {
13436 DWConvMicrokernelTester()
13437 .cr(1)
13438 .kr(9)
13439 .channels(channels)
13440 .input_offset(48)
13441 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13442 }
13443 }
13444
TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF,zero)13445 TEST(QU8_DWCONV_MINMAX_FP32_UP1X9__SCALAR_LRINTF, zero) {
13446 for (uint32_t mz = 0; mz < 9; mz++) {
13447 for (uint32_t channels = 2; channels < 16; channels += 3) {
13448 DWConvMicrokernelTester()
13449 .cr(1)
13450 .kr(9)
13451 .channels(channels)
13452 .input_offset(48)
13453 .zero_index(mz)
13454 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13455 }
13456 }
13457 }
13458
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,c_eq_1)13459 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_eq_1) {
13460 DWConvMicrokernelTester()
13461 .cr(1)
13462 .kr(25)
13463 .channels(1)
13464 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13465 }
13466
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,c_gt_1)13467 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_gt_1) {
13468 for (uint32_t channels = 2; channels < 10; channels++) {
13469 DWConvMicrokernelTester()
13470 .cr(1)
13471 .kr(25)
13472 .channels(channels)
13473 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13474 }
13475 }
13476
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,c_gt_1_with_qmin)13477 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_gt_1_with_qmin) {
13478 for (uint32_t channels = 2; channels < 10; channels++) {
13479 DWConvMicrokernelTester()
13480 .cr(1)
13481 .kr(25)
13482 .channels(channels)
13483 .qmin(128)
13484 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13485 }
13486 }
13487
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,c_gt_1_with_qmax)13488 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, c_gt_1_with_qmax) {
13489 for (uint32_t channels = 2; channels < 10; channels++) {
13490 DWConvMicrokernelTester()
13491 .cr(1)
13492 .kr(25)
13493 .channels(channels)
13494 .qmax(128)
13495 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13496 }
13497 }
13498
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,multipixel)13499 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel) {
13500 for (size_t channels = 1; channels <= 5; channels += 1) {
13501 DWConvMicrokernelTester()
13502 .cr(1)
13503 .kr(25)
13504 .channels(channels)
13505 .width(3)
13506 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13507 }
13508 }
13509
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,multipixel_with_step)13510 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_step) {
13511 for (size_t channels = 1; channels <= 5; channels += 1) {
13512 for (size_t step = 2; step <= 25; step++) {
13513 DWConvMicrokernelTester()
13514 .cr(1)
13515 .kr(25)
13516 .channels(channels)
13517 .width(3)
13518 .step(step)
13519 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13520 }
13521 }
13522 }
13523
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,multipixel_with_output_stride)13524 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_output_stride) {
13525 for (size_t channels = 1; channels <= 5; channels += 1) {
13526 DWConvMicrokernelTester()
13527 .cr(1)
13528 .kr(25)
13529 .channels(1)
13530 .width(5)
13531 .output_stride(7)
13532 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13533 }
13534 }
13535
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,multipixel_with_qmin)13536 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_qmin) {
13537 for (size_t channels = 1; channels <= 5; channels += 1) {
13538 DWConvMicrokernelTester()
13539 .cr(1)
13540 .kr(25)
13541 .channels(channels)
13542 .width(3)
13543 .qmin(128)
13544 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13545 }
13546 }
13547
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,multipixel_with_qmax)13548 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, multipixel_with_qmax) {
13549 for (size_t channels = 1; channels <= 5; channels += 1) {
13550 DWConvMicrokernelTester()
13551 .cr(1)
13552 .kr(25)
13553 .channels(channels)
13554 .width(3)
13555 .qmax(128)
13556 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13557 }
13558 }
13559
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,input_zero_point_only)13560 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, input_zero_point_only) {
13561 for (size_t channels = 1; channels <= 5; channels += 1) {
13562 DWConvMicrokernelTester()
13563 .cr(1)
13564 .kr(25)
13565 .channels(channels)
13566 .width(3)
13567 .input_zero_point(255)
13568 .kernel_zero_point(0)
13569 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13570 }
13571 }
13572
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,kernel_zero_point_only)13573 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, kernel_zero_point_only) {
13574 for (size_t channels = 1; channels <= 5; channels += 1) {
13575 DWConvMicrokernelTester()
13576 .cr(1)
13577 .kr(25)
13578 .channels(channels)
13579 .width(3)
13580 .input_zero_point(0)
13581 .kernel_zero_point(255)
13582 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13583 }
13584 }
13585
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,input_offset)13586 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, input_offset) {
13587 for (uint32_t channels = 2; channels < 16; channels += 3) {
13588 DWConvMicrokernelTester()
13589 .cr(1)
13590 .kr(25)
13591 .channels(channels)
13592 .input_offset(48)
13593 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13594 }
13595 }
13596
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC,zero)13597 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_FMAGIC, zero) {
13598 for (uint32_t mz = 0; mz < 25; mz++) {
13599 for (uint32_t channels = 2; channels < 16; channels += 3) {
13600 DWConvMicrokernelTester()
13601 .cr(1)
13602 .kr(25)
13603 .channels(channels)
13604 .input_offset(48)
13605 .zero_index(mz)
13606 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13607 }
13608 }
13609 }
13610
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,c_eq_1)13611 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_eq_1) {
13612 DWConvMicrokernelTester()
13613 .cr(1)
13614 .kr(25)
13615 .channels(1)
13616 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13617 }
13618
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,c_gt_1)13619 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1) {
13620 for (uint32_t channels = 2; channels < 10; channels++) {
13621 DWConvMicrokernelTester()
13622 .cr(1)
13623 .kr(25)
13624 .channels(channels)
13625 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13626 }
13627 }
13628
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,c_gt_1_with_qmin)13629 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1_with_qmin) {
13630 for (uint32_t channels = 2; channels < 10; channels++) {
13631 DWConvMicrokernelTester()
13632 .cr(1)
13633 .kr(25)
13634 .channels(channels)
13635 .qmin(128)
13636 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13637 }
13638 }
13639
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,c_gt_1_with_qmax)13640 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, c_gt_1_with_qmax) {
13641 for (uint32_t channels = 2; channels < 10; channels++) {
13642 DWConvMicrokernelTester()
13643 .cr(1)
13644 .kr(25)
13645 .channels(channels)
13646 .qmax(128)
13647 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13648 }
13649 }
13650
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,multipixel)13651 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel) {
13652 for (size_t channels = 1; channels <= 5; channels += 1) {
13653 DWConvMicrokernelTester()
13654 .cr(1)
13655 .kr(25)
13656 .channels(channels)
13657 .width(3)
13658 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13659 }
13660 }
13661
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,multipixel_with_step)13662 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_step) {
13663 for (size_t channels = 1; channels <= 5; channels += 1) {
13664 for (size_t step = 2; step <= 25; step++) {
13665 DWConvMicrokernelTester()
13666 .cr(1)
13667 .kr(25)
13668 .channels(channels)
13669 .width(3)
13670 .step(step)
13671 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13672 }
13673 }
13674 }
13675
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,multipixel_with_output_stride)13676 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
13677 for (size_t channels = 1; channels <= 5; channels += 1) {
13678 DWConvMicrokernelTester()
13679 .cr(1)
13680 .kr(25)
13681 .channels(1)
13682 .width(5)
13683 .output_stride(7)
13684 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13685 }
13686 }
13687
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,multipixel_with_qmin)13688 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_qmin) {
13689 for (size_t channels = 1; channels <= 5; channels += 1) {
13690 DWConvMicrokernelTester()
13691 .cr(1)
13692 .kr(25)
13693 .channels(channels)
13694 .width(3)
13695 .qmin(128)
13696 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13697 }
13698 }
13699
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,multipixel_with_qmax)13700 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, multipixel_with_qmax) {
13701 for (size_t channels = 1; channels <= 5; channels += 1) {
13702 DWConvMicrokernelTester()
13703 .cr(1)
13704 .kr(25)
13705 .channels(channels)
13706 .width(3)
13707 .qmax(128)
13708 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13709 }
13710 }
13711
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,input_zero_point_only)13712 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, input_zero_point_only) {
13713 for (size_t channels = 1; channels <= 5; channels += 1) {
13714 DWConvMicrokernelTester()
13715 .cr(1)
13716 .kr(25)
13717 .channels(channels)
13718 .width(3)
13719 .input_zero_point(255)
13720 .kernel_zero_point(0)
13721 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13722 }
13723 }
13724
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,kernel_zero_point_only)13725 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, kernel_zero_point_only) {
13726 for (size_t channels = 1; channels <= 5; channels += 1) {
13727 DWConvMicrokernelTester()
13728 .cr(1)
13729 .kr(25)
13730 .channels(channels)
13731 .width(3)
13732 .input_zero_point(0)
13733 .kernel_zero_point(255)
13734 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13735 }
13736 }
13737
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,input_offset)13738 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, input_offset) {
13739 for (uint32_t channels = 2; channels < 16; channels += 3) {
13740 DWConvMicrokernelTester()
13741 .cr(1)
13742 .kr(25)
13743 .channels(channels)
13744 .input_offset(48)
13745 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13746 }
13747 }
13748
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC,zero)13749 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_IMAGIC, zero) {
13750 for (uint32_t mz = 0; mz < 25; mz++) {
13751 for (uint32_t channels = 2; channels < 16; channels += 3) {
13752 DWConvMicrokernelTester()
13753 .cr(1)
13754 .kr(25)
13755 .channels(channels)
13756 .input_offset(48)
13757 .zero_index(mz)
13758 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
13759 }
13760 }
13761 }
13762
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,c_eq_1)13763 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_eq_1) {
13764 DWConvMicrokernelTester()
13765 .cr(1)
13766 .kr(25)
13767 .channels(1)
13768 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13769 }
13770
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,c_gt_1)13771 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1) {
13772 for (uint32_t channels = 2; channels < 10; channels++) {
13773 DWConvMicrokernelTester()
13774 .cr(1)
13775 .kr(25)
13776 .channels(channels)
13777 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13778 }
13779 }
13780
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,c_gt_1_with_qmin)13781 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmin) {
13782 for (uint32_t channels = 2; channels < 10; channels++) {
13783 DWConvMicrokernelTester()
13784 .cr(1)
13785 .kr(25)
13786 .channels(channels)
13787 .qmin(128)
13788 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13789 }
13790 }
13791
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,c_gt_1_with_qmax)13792 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, c_gt_1_with_qmax) {
13793 for (uint32_t channels = 2; channels < 10; channels++) {
13794 DWConvMicrokernelTester()
13795 .cr(1)
13796 .kr(25)
13797 .channels(channels)
13798 .qmax(128)
13799 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13800 }
13801 }
13802
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,multipixel)13803 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel) {
13804 for (size_t channels = 1; channels <= 5; channels += 1) {
13805 DWConvMicrokernelTester()
13806 .cr(1)
13807 .kr(25)
13808 .channels(channels)
13809 .width(3)
13810 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13811 }
13812 }
13813
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,multipixel_with_step)13814 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_step) {
13815 for (size_t channels = 1; channels <= 5; channels += 1) {
13816 for (size_t step = 2; step <= 25; step++) {
13817 DWConvMicrokernelTester()
13818 .cr(1)
13819 .kr(25)
13820 .channels(channels)
13821 .width(3)
13822 .step(step)
13823 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13824 }
13825 }
13826 }
13827
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,multipixel_with_output_stride)13828 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_output_stride) {
13829 for (size_t channels = 1; channels <= 5; channels += 1) {
13830 DWConvMicrokernelTester()
13831 .cr(1)
13832 .kr(25)
13833 .channels(1)
13834 .width(5)
13835 .output_stride(7)
13836 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13837 }
13838 }
13839
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,multipixel_with_qmin)13840 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmin) {
13841 for (size_t channels = 1; channels <= 5; channels += 1) {
13842 DWConvMicrokernelTester()
13843 .cr(1)
13844 .kr(25)
13845 .channels(channels)
13846 .width(3)
13847 .qmin(128)
13848 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13849 }
13850 }
13851
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,multipixel_with_qmax)13852 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, multipixel_with_qmax) {
13853 for (size_t channels = 1; channels <= 5; channels += 1) {
13854 DWConvMicrokernelTester()
13855 .cr(1)
13856 .kr(25)
13857 .channels(channels)
13858 .width(3)
13859 .qmax(128)
13860 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13861 }
13862 }
13863
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,input_zero_point_only)13864 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, input_zero_point_only) {
13865 for (size_t channels = 1; channels <= 5; channels += 1) {
13866 DWConvMicrokernelTester()
13867 .cr(1)
13868 .kr(25)
13869 .channels(channels)
13870 .width(3)
13871 .input_zero_point(255)
13872 .kernel_zero_point(0)
13873 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13874 }
13875 }
13876
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,kernel_zero_point_only)13877 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, kernel_zero_point_only) {
13878 for (size_t channels = 1; channels <= 5; channels += 1) {
13879 DWConvMicrokernelTester()
13880 .cr(1)
13881 .kr(25)
13882 .channels(channels)
13883 .width(3)
13884 .input_zero_point(0)
13885 .kernel_zero_point(255)
13886 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13887 }
13888 }
13889
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,input_offset)13890 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, input_offset) {
13891 for (uint32_t channels = 2; channels < 16; channels += 3) {
13892 DWConvMicrokernelTester()
13893 .cr(1)
13894 .kr(25)
13895 .channels(channels)
13896 .input_offset(48)
13897 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13898 }
13899 }
13900
TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF,zero)13901 TEST(QU8_DWCONV_MINMAX_FP32_UP1X25__SCALAR_LRINTF, zero) {
13902 for (uint32_t mz = 0; mz < 25; mz++) {
13903 for (uint32_t channels = 2; channels < 16; channels += 3) {
13904 DWConvMicrokernelTester()
13905 .cr(1)
13906 .kr(25)
13907 .channels(channels)
13908 .input_offset(48)
13909 .zero_index(mz)
13910 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up1x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
13911 }
13912 }
13913 }
13914
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_eq_2)13915 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_eq_2) {
13916 DWConvMicrokernelTester()
13917 .cr(2)
13918 .kr(9)
13919 .channels(2)
13920 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13921 }
13922
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_div_2)13923 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_div_2) {
13924 for (uint32_t channels = 4; channels < 32; channels += 6) {
13925 DWConvMicrokernelTester()
13926 .cr(2)
13927 .kr(9)
13928 .channels(channels)
13929 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13930 }
13931 }
13932
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_div_2_with_qmin)13933 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_div_2_with_qmin) {
13934 for (uint32_t channels = 4; channels < 32; channels += 6) {
13935 DWConvMicrokernelTester()
13936 .cr(2)
13937 .kr(9)
13938 .channels(channels)
13939 .qmin(128)
13940 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13941 }
13942 }
13943
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_div_2_with_qmax)13944 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_div_2_with_qmax) {
13945 for (uint32_t channels = 4; channels < 32; channels += 6) {
13946 DWConvMicrokernelTester()
13947 .cr(2)
13948 .kr(9)
13949 .channels(channels)
13950 .qmax(128)
13951 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13952 }
13953 }
13954
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_lt_2)13955 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_lt_2) {
13956 for (uint32_t channels = 1; channels < 2; channels++) {
13957 DWConvMicrokernelTester()
13958 .cr(2)
13959 .kr(9)
13960 .channels(channels)
13961 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13962 }
13963 }
13964
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_gt_2)13965 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_gt_2) {
13966 for (uint32_t channels = 3; channels < 4; channels++) {
13967 DWConvMicrokernelTester()
13968 .cr(2)
13969 .kr(9)
13970 .channels(channels)
13971 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13972 }
13973 }
13974
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_gt_2_with_qmin)13975 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_gt_2_with_qmin) {
13976 for (uint32_t channels = 3; channels < 4; channels++) {
13977 DWConvMicrokernelTester()
13978 .cr(2)
13979 .kr(9)
13980 .channels(channels)
13981 .qmin(128)
13982 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13983 }
13984 }
13985
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,c_gt_2_with_qmax)13986 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, c_gt_2_with_qmax) {
13987 for (uint32_t channels = 3; channels < 4; channels++) {
13988 DWConvMicrokernelTester()
13989 .cr(2)
13990 .kr(9)
13991 .channels(channels)
13992 .qmax(128)
13993 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
13994 }
13995 }
13996
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,multipixel)13997 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel) {
13998 for (size_t channels = 1; channels <= 10; channels += 1) {
13999 DWConvMicrokernelTester()
14000 .cr(2)
14001 .kr(9)
14002 .channels(channels)
14003 .width(3)
14004 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14005 }
14006 }
14007
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,multipixel_with_step)14008 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_step) {
14009 for (size_t channels = 1; channels <= 10; channels += 1) {
14010 for (size_t step = 2; step <= 9; step++) {
14011 DWConvMicrokernelTester()
14012 .cr(2)
14013 .kr(9)
14014 .channels(channels)
14015 .width(3)
14016 .step(step)
14017 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14018 }
14019 }
14020 }
14021
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,multipixel_with_output_stride)14022 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_output_stride) {
14023 for (size_t channels = 1; channels <= 10; channels += 1) {
14024 DWConvMicrokernelTester()
14025 .cr(2)
14026 .kr(9)
14027 .channels(2)
14028 .width(5)
14029 .output_stride(13)
14030 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14031 }
14032 }
14033
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,multipixel_with_qmin)14034 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_qmin) {
14035 for (size_t channels = 1; channels <= 10; channels += 1) {
14036 DWConvMicrokernelTester()
14037 .cr(2)
14038 .kr(9)
14039 .channels(channels)
14040 .width(3)
14041 .qmin(128)
14042 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14043 }
14044 }
14045
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,multipixel_with_qmax)14046 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, multipixel_with_qmax) {
14047 for (size_t channels = 1; channels <= 10; channels += 1) {
14048 DWConvMicrokernelTester()
14049 .cr(2)
14050 .kr(9)
14051 .channels(channels)
14052 .width(3)
14053 .qmax(128)
14054 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14055 }
14056 }
14057
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,input_zero_point_only)14058 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, input_zero_point_only) {
14059 for (size_t channels = 1; channels <= 10; channels += 1) {
14060 DWConvMicrokernelTester()
14061 .cr(2)
14062 .kr(9)
14063 .channels(channels)
14064 .width(3)
14065 .input_zero_point(255)
14066 .kernel_zero_point(0)
14067 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14068 }
14069 }
14070
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,kernel_zero_point_only)14071 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, kernel_zero_point_only) {
14072 for (size_t channels = 1; channels <= 10; channels += 1) {
14073 DWConvMicrokernelTester()
14074 .cr(2)
14075 .kr(9)
14076 .channels(channels)
14077 .width(3)
14078 .input_zero_point(0)
14079 .kernel_zero_point(255)
14080 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14081 }
14082 }
14083
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,input_offset)14084 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, input_offset) {
14085 for (uint32_t channels = 4; channels < 32; channels += 6) {
14086 DWConvMicrokernelTester()
14087 .cr(2)
14088 .kr(9)
14089 .channels(channels)
14090 .input_offset(80)
14091 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14092 }
14093 }
14094
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC,zero)14095 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_FMAGIC, zero) {
14096 for (uint32_t mz = 0; mz < 9; mz++) {
14097 for (uint32_t channels = 4; channels < 32; channels += 6) {
14098 DWConvMicrokernelTester()
14099 .cr(2)
14100 .kr(9)
14101 .channels(channels)
14102 .input_offset(80)
14103 .zero_index(mz)
14104 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14105 }
14106 }
14107 }
14108
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_eq_2)14109 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_eq_2) {
14110 DWConvMicrokernelTester()
14111 .cr(2)
14112 .kr(9)
14113 .channels(2)
14114 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14115 }
14116
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_div_2)14117 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2) {
14118 for (uint32_t channels = 4; channels < 32; channels += 6) {
14119 DWConvMicrokernelTester()
14120 .cr(2)
14121 .kr(9)
14122 .channels(channels)
14123 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14124 }
14125 }
14126
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_div_2_with_qmin)14127 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2_with_qmin) {
14128 for (uint32_t channels = 4; channels < 32; channels += 6) {
14129 DWConvMicrokernelTester()
14130 .cr(2)
14131 .kr(9)
14132 .channels(channels)
14133 .qmin(128)
14134 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14135 }
14136 }
14137
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_div_2_with_qmax)14138 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_div_2_with_qmax) {
14139 for (uint32_t channels = 4; channels < 32; channels += 6) {
14140 DWConvMicrokernelTester()
14141 .cr(2)
14142 .kr(9)
14143 .channels(channels)
14144 .qmax(128)
14145 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14146 }
14147 }
14148
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_lt_2)14149 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_lt_2) {
14150 for (uint32_t channels = 1; channels < 2; channels++) {
14151 DWConvMicrokernelTester()
14152 .cr(2)
14153 .kr(9)
14154 .channels(channels)
14155 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14156 }
14157 }
14158
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_gt_2)14159 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2) {
14160 for (uint32_t channels = 3; channels < 4; channels++) {
14161 DWConvMicrokernelTester()
14162 .cr(2)
14163 .kr(9)
14164 .channels(channels)
14165 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14166 }
14167 }
14168
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_gt_2_with_qmin)14169 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2_with_qmin) {
14170 for (uint32_t channels = 3; channels < 4; channels++) {
14171 DWConvMicrokernelTester()
14172 .cr(2)
14173 .kr(9)
14174 .channels(channels)
14175 .qmin(128)
14176 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14177 }
14178 }
14179
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,c_gt_2_with_qmax)14180 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, c_gt_2_with_qmax) {
14181 for (uint32_t channels = 3; channels < 4; channels++) {
14182 DWConvMicrokernelTester()
14183 .cr(2)
14184 .kr(9)
14185 .channels(channels)
14186 .qmax(128)
14187 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14188 }
14189 }
14190
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,multipixel)14191 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel) {
14192 for (size_t channels = 1; channels <= 10; channels += 1) {
14193 DWConvMicrokernelTester()
14194 .cr(2)
14195 .kr(9)
14196 .channels(channels)
14197 .width(3)
14198 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14199 }
14200 }
14201
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,multipixel_with_step)14202 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_step) {
14203 for (size_t channels = 1; channels <= 10; channels += 1) {
14204 for (size_t step = 2; step <= 9; step++) {
14205 DWConvMicrokernelTester()
14206 .cr(2)
14207 .kr(9)
14208 .channels(channels)
14209 .width(3)
14210 .step(step)
14211 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14212 }
14213 }
14214 }
14215
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,multipixel_with_output_stride)14216 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
14217 for (size_t channels = 1; channels <= 10; channels += 1) {
14218 DWConvMicrokernelTester()
14219 .cr(2)
14220 .kr(9)
14221 .channels(2)
14222 .width(5)
14223 .output_stride(13)
14224 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14225 }
14226 }
14227
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,multipixel_with_qmin)14228 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_qmin) {
14229 for (size_t channels = 1; channels <= 10; channels += 1) {
14230 DWConvMicrokernelTester()
14231 .cr(2)
14232 .kr(9)
14233 .channels(channels)
14234 .width(3)
14235 .qmin(128)
14236 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14237 }
14238 }
14239
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,multipixel_with_qmax)14240 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, multipixel_with_qmax) {
14241 for (size_t channels = 1; channels <= 10; channels += 1) {
14242 DWConvMicrokernelTester()
14243 .cr(2)
14244 .kr(9)
14245 .channels(channels)
14246 .width(3)
14247 .qmax(128)
14248 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14249 }
14250 }
14251
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,input_zero_point_only)14252 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, input_zero_point_only) {
14253 for (size_t channels = 1; channels <= 10; channels += 1) {
14254 DWConvMicrokernelTester()
14255 .cr(2)
14256 .kr(9)
14257 .channels(channels)
14258 .width(3)
14259 .input_zero_point(255)
14260 .kernel_zero_point(0)
14261 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14262 }
14263 }
14264
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,kernel_zero_point_only)14265 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, kernel_zero_point_only) {
14266 for (size_t channels = 1; channels <= 10; channels += 1) {
14267 DWConvMicrokernelTester()
14268 .cr(2)
14269 .kr(9)
14270 .channels(channels)
14271 .width(3)
14272 .input_zero_point(0)
14273 .kernel_zero_point(255)
14274 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14275 }
14276 }
14277
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,input_offset)14278 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, input_offset) {
14279 for (uint32_t channels = 4; channels < 32; channels += 6) {
14280 DWConvMicrokernelTester()
14281 .cr(2)
14282 .kr(9)
14283 .channels(channels)
14284 .input_offset(80)
14285 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14286 }
14287 }
14288
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC,zero)14289 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_IMAGIC, zero) {
14290 for (uint32_t mz = 0; mz < 9; mz++) {
14291 for (uint32_t channels = 4; channels < 32; channels += 6) {
14292 DWConvMicrokernelTester()
14293 .cr(2)
14294 .kr(9)
14295 .channels(channels)
14296 .input_offset(80)
14297 .zero_index(mz)
14298 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14299 }
14300 }
14301 }
14302
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_eq_2)14303 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_eq_2) {
14304 DWConvMicrokernelTester()
14305 .cr(2)
14306 .kr(9)
14307 .channels(2)
14308 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14309 }
14310
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_div_2)14311 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2) {
14312 for (uint32_t channels = 4; channels < 32; channels += 6) {
14313 DWConvMicrokernelTester()
14314 .cr(2)
14315 .kr(9)
14316 .channels(channels)
14317 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14318 }
14319 }
14320
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_div_2_with_qmin)14321 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmin) {
14322 for (uint32_t channels = 4; channels < 32; channels += 6) {
14323 DWConvMicrokernelTester()
14324 .cr(2)
14325 .kr(9)
14326 .channels(channels)
14327 .qmin(128)
14328 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14329 }
14330 }
14331
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_div_2_with_qmax)14332 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_div_2_with_qmax) {
14333 for (uint32_t channels = 4; channels < 32; channels += 6) {
14334 DWConvMicrokernelTester()
14335 .cr(2)
14336 .kr(9)
14337 .channels(channels)
14338 .qmax(128)
14339 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14340 }
14341 }
14342
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_lt_2)14343 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_lt_2) {
14344 for (uint32_t channels = 1; channels < 2; channels++) {
14345 DWConvMicrokernelTester()
14346 .cr(2)
14347 .kr(9)
14348 .channels(channels)
14349 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14350 }
14351 }
14352
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_gt_2)14353 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2) {
14354 for (uint32_t channels = 3; channels < 4; channels++) {
14355 DWConvMicrokernelTester()
14356 .cr(2)
14357 .kr(9)
14358 .channels(channels)
14359 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14360 }
14361 }
14362
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_gt_2_with_qmin)14363 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmin) {
14364 for (uint32_t channels = 3; channels < 4; channels++) {
14365 DWConvMicrokernelTester()
14366 .cr(2)
14367 .kr(9)
14368 .channels(channels)
14369 .qmin(128)
14370 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14371 }
14372 }
14373
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,c_gt_2_with_qmax)14374 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, c_gt_2_with_qmax) {
14375 for (uint32_t channels = 3; channels < 4; channels++) {
14376 DWConvMicrokernelTester()
14377 .cr(2)
14378 .kr(9)
14379 .channels(channels)
14380 .qmax(128)
14381 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14382 }
14383 }
14384
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,multipixel)14385 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel) {
14386 for (size_t channels = 1; channels <= 10; channels += 1) {
14387 DWConvMicrokernelTester()
14388 .cr(2)
14389 .kr(9)
14390 .channels(channels)
14391 .width(3)
14392 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14393 }
14394 }
14395
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,multipixel_with_step)14396 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_step) {
14397 for (size_t channels = 1; channels <= 10; channels += 1) {
14398 for (size_t step = 2; step <= 9; step++) {
14399 DWConvMicrokernelTester()
14400 .cr(2)
14401 .kr(9)
14402 .channels(channels)
14403 .width(3)
14404 .step(step)
14405 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14406 }
14407 }
14408 }
14409
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,multipixel_with_output_stride)14410 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_output_stride) {
14411 for (size_t channels = 1; channels <= 10; channels += 1) {
14412 DWConvMicrokernelTester()
14413 .cr(2)
14414 .kr(9)
14415 .channels(2)
14416 .width(5)
14417 .output_stride(13)
14418 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14419 }
14420 }
14421
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,multipixel_with_qmin)14422 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmin) {
14423 for (size_t channels = 1; channels <= 10; channels += 1) {
14424 DWConvMicrokernelTester()
14425 .cr(2)
14426 .kr(9)
14427 .channels(channels)
14428 .width(3)
14429 .qmin(128)
14430 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14431 }
14432 }
14433
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,multipixel_with_qmax)14434 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, multipixel_with_qmax) {
14435 for (size_t channels = 1; channels <= 10; channels += 1) {
14436 DWConvMicrokernelTester()
14437 .cr(2)
14438 .kr(9)
14439 .channels(channels)
14440 .width(3)
14441 .qmax(128)
14442 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14443 }
14444 }
14445
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,input_zero_point_only)14446 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, input_zero_point_only) {
14447 for (size_t channels = 1; channels <= 10; channels += 1) {
14448 DWConvMicrokernelTester()
14449 .cr(2)
14450 .kr(9)
14451 .channels(channels)
14452 .width(3)
14453 .input_zero_point(255)
14454 .kernel_zero_point(0)
14455 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14456 }
14457 }
14458
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,kernel_zero_point_only)14459 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, kernel_zero_point_only) {
14460 for (size_t channels = 1; channels <= 10; channels += 1) {
14461 DWConvMicrokernelTester()
14462 .cr(2)
14463 .kr(9)
14464 .channels(channels)
14465 .width(3)
14466 .input_zero_point(0)
14467 .kernel_zero_point(255)
14468 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14469 }
14470 }
14471
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,input_offset)14472 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, input_offset) {
14473 for (uint32_t channels = 4; channels < 32; channels += 6) {
14474 DWConvMicrokernelTester()
14475 .cr(2)
14476 .kr(9)
14477 .channels(channels)
14478 .input_offset(80)
14479 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14480 }
14481 }
14482
TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF,zero)14483 TEST(QU8_DWCONV_MINMAX_FP32_UP2X9__SCALAR_LRINTF, zero) {
14484 for (uint32_t mz = 0; mz < 9; mz++) {
14485 for (uint32_t channels = 4; channels < 32; channels += 6) {
14486 DWConvMicrokernelTester()
14487 .cr(2)
14488 .kr(9)
14489 .channels(channels)
14490 .input_offset(80)
14491 .zero_index(mz)
14492 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14493 }
14494 }
14495 }
14496
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_eq_2)14497 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_eq_2) {
14498 DWConvMicrokernelTester()
14499 .cr(2)
14500 .kr(25)
14501 .channels(2)
14502 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14503 }
14504
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_div_2)14505 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_div_2) {
14506 for (uint32_t channels = 4; channels < 32; channels += 6) {
14507 DWConvMicrokernelTester()
14508 .cr(2)
14509 .kr(25)
14510 .channels(channels)
14511 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14512 }
14513 }
14514
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_div_2_with_qmin)14515 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_div_2_with_qmin) {
14516 for (uint32_t channels = 4; channels < 32; channels += 6) {
14517 DWConvMicrokernelTester()
14518 .cr(2)
14519 .kr(25)
14520 .channels(channels)
14521 .qmin(128)
14522 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14523 }
14524 }
14525
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_div_2_with_qmax)14526 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_div_2_with_qmax) {
14527 for (uint32_t channels = 4; channels < 32; channels += 6) {
14528 DWConvMicrokernelTester()
14529 .cr(2)
14530 .kr(25)
14531 .channels(channels)
14532 .qmax(128)
14533 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14534 }
14535 }
14536
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_lt_2)14537 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_lt_2) {
14538 for (uint32_t channels = 1; channels < 2; channels++) {
14539 DWConvMicrokernelTester()
14540 .cr(2)
14541 .kr(25)
14542 .channels(channels)
14543 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14544 }
14545 }
14546
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_gt_2)14547 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_gt_2) {
14548 for (uint32_t channels = 3; channels < 4; channels++) {
14549 DWConvMicrokernelTester()
14550 .cr(2)
14551 .kr(25)
14552 .channels(channels)
14553 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14554 }
14555 }
14556
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_gt_2_with_qmin)14557 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_gt_2_with_qmin) {
14558 for (uint32_t channels = 3; channels < 4; channels++) {
14559 DWConvMicrokernelTester()
14560 .cr(2)
14561 .kr(25)
14562 .channels(channels)
14563 .qmin(128)
14564 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14565 }
14566 }
14567
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,c_gt_2_with_qmax)14568 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, c_gt_2_with_qmax) {
14569 for (uint32_t channels = 3; channels < 4; channels++) {
14570 DWConvMicrokernelTester()
14571 .cr(2)
14572 .kr(25)
14573 .channels(channels)
14574 .qmax(128)
14575 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14576 }
14577 }
14578
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,multipixel)14579 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel) {
14580 for (size_t channels = 1; channels <= 10; channels += 1) {
14581 DWConvMicrokernelTester()
14582 .cr(2)
14583 .kr(25)
14584 .channels(channels)
14585 .width(3)
14586 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14587 }
14588 }
14589
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,multipixel_with_step)14590 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_step) {
14591 for (size_t channels = 1; channels <= 10; channels += 1) {
14592 for (size_t step = 2; step <= 25; step++) {
14593 DWConvMicrokernelTester()
14594 .cr(2)
14595 .kr(25)
14596 .channels(channels)
14597 .width(3)
14598 .step(step)
14599 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14600 }
14601 }
14602 }
14603
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,multipixel_with_output_stride)14604 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_output_stride) {
14605 for (size_t channels = 1; channels <= 10; channels += 1) {
14606 DWConvMicrokernelTester()
14607 .cr(2)
14608 .kr(25)
14609 .channels(2)
14610 .width(5)
14611 .output_stride(13)
14612 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14613 }
14614 }
14615
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,multipixel_with_qmin)14616 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_qmin) {
14617 for (size_t channels = 1; channels <= 10; channels += 1) {
14618 DWConvMicrokernelTester()
14619 .cr(2)
14620 .kr(25)
14621 .channels(channels)
14622 .width(3)
14623 .qmin(128)
14624 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14625 }
14626 }
14627
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,multipixel_with_qmax)14628 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, multipixel_with_qmax) {
14629 for (size_t channels = 1; channels <= 10; channels += 1) {
14630 DWConvMicrokernelTester()
14631 .cr(2)
14632 .kr(25)
14633 .channels(channels)
14634 .width(3)
14635 .qmax(128)
14636 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14637 }
14638 }
14639
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,input_zero_point_only)14640 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, input_zero_point_only) {
14641 for (size_t channels = 1; channels <= 10; channels += 1) {
14642 DWConvMicrokernelTester()
14643 .cr(2)
14644 .kr(25)
14645 .channels(channels)
14646 .width(3)
14647 .input_zero_point(255)
14648 .kernel_zero_point(0)
14649 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14650 }
14651 }
14652
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,kernel_zero_point_only)14653 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, kernel_zero_point_only) {
14654 for (size_t channels = 1; channels <= 10; channels += 1) {
14655 DWConvMicrokernelTester()
14656 .cr(2)
14657 .kr(25)
14658 .channels(channels)
14659 .width(3)
14660 .input_zero_point(0)
14661 .kernel_zero_point(255)
14662 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14663 }
14664 }
14665
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,input_offset)14666 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, input_offset) {
14667 for (uint32_t channels = 4; channels < 32; channels += 6) {
14668 DWConvMicrokernelTester()
14669 .cr(2)
14670 .kr(25)
14671 .channels(channels)
14672 .input_offset(80)
14673 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14674 }
14675 }
14676
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC,zero)14677 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_FMAGIC, zero) {
14678 for (uint32_t mz = 0; mz < 25; mz++) {
14679 for (uint32_t channels = 4; channels < 32; channels += 6) {
14680 DWConvMicrokernelTester()
14681 .cr(2)
14682 .kr(25)
14683 .channels(channels)
14684 .input_offset(80)
14685 .zero_index(mz)
14686 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
14687 }
14688 }
14689 }
14690
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_eq_2)14691 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_eq_2) {
14692 DWConvMicrokernelTester()
14693 .cr(2)
14694 .kr(25)
14695 .channels(2)
14696 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14697 }
14698
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_div_2)14699 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2) {
14700 for (uint32_t channels = 4; channels < 32; channels += 6) {
14701 DWConvMicrokernelTester()
14702 .cr(2)
14703 .kr(25)
14704 .channels(channels)
14705 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14706 }
14707 }
14708
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_div_2_with_qmin)14709 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2_with_qmin) {
14710 for (uint32_t channels = 4; channels < 32; channels += 6) {
14711 DWConvMicrokernelTester()
14712 .cr(2)
14713 .kr(25)
14714 .channels(channels)
14715 .qmin(128)
14716 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14717 }
14718 }
14719
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_div_2_with_qmax)14720 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_div_2_with_qmax) {
14721 for (uint32_t channels = 4; channels < 32; channels += 6) {
14722 DWConvMicrokernelTester()
14723 .cr(2)
14724 .kr(25)
14725 .channels(channels)
14726 .qmax(128)
14727 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14728 }
14729 }
14730
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_lt_2)14731 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_lt_2) {
14732 for (uint32_t channels = 1; channels < 2; channels++) {
14733 DWConvMicrokernelTester()
14734 .cr(2)
14735 .kr(25)
14736 .channels(channels)
14737 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14738 }
14739 }
14740
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_gt_2)14741 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2) {
14742 for (uint32_t channels = 3; channels < 4; channels++) {
14743 DWConvMicrokernelTester()
14744 .cr(2)
14745 .kr(25)
14746 .channels(channels)
14747 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14748 }
14749 }
14750
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_gt_2_with_qmin)14751 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2_with_qmin) {
14752 for (uint32_t channels = 3; channels < 4; channels++) {
14753 DWConvMicrokernelTester()
14754 .cr(2)
14755 .kr(25)
14756 .channels(channels)
14757 .qmin(128)
14758 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14759 }
14760 }
14761
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,c_gt_2_with_qmax)14762 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, c_gt_2_with_qmax) {
14763 for (uint32_t channels = 3; channels < 4; channels++) {
14764 DWConvMicrokernelTester()
14765 .cr(2)
14766 .kr(25)
14767 .channels(channels)
14768 .qmax(128)
14769 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14770 }
14771 }
14772
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,multipixel)14773 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel) {
14774 for (size_t channels = 1; channels <= 10; channels += 1) {
14775 DWConvMicrokernelTester()
14776 .cr(2)
14777 .kr(25)
14778 .channels(channels)
14779 .width(3)
14780 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14781 }
14782 }
14783
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,multipixel_with_step)14784 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_step) {
14785 for (size_t channels = 1; channels <= 10; channels += 1) {
14786 for (size_t step = 2; step <= 25; step++) {
14787 DWConvMicrokernelTester()
14788 .cr(2)
14789 .kr(25)
14790 .channels(channels)
14791 .width(3)
14792 .step(step)
14793 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14794 }
14795 }
14796 }
14797
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,multipixel_with_output_stride)14798 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
14799 for (size_t channels = 1; channels <= 10; channels += 1) {
14800 DWConvMicrokernelTester()
14801 .cr(2)
14802 .kr(25)
14803 .channels(2)
14804 .width(5)
14805 .output_stride(13)
14806 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14807 }
14808 }
14809
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,multipixel_with_qmin)14810 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_qmin) {
14811 for (size_t channels = 1; channels <= 10; channels += 1) {
14812 DWConvMicrokernelTester()
14813 .cr(2)
14814 .kr(25)
14815 .channels(channels)
14816 .width(3)
14817 .qmin(128)
14818 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14819 }
14820 }
14821
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,multipixel_with_qmax)14822 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, multipixel_with_qmax) {
14823 for (size_t channels = 1; channels <= 10; channels += 1) {
14824 DWConvMicrokernelTester()
14825 .cr(2)
14826 .kr(25)
14827 .channels(channels)
14828 .width(3)
14829 .qmax(128)
14830 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14831 }
14832 }
14833
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,input_zero_point_only)14834 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, input_zero_point_only) {
14835 for (size_t channels = 1; channels <= 10; channels += 1) {
14836 DWConvMicrokernelTester()
14837 .cr(2)
14838 .kr(25)
14839 .channels(channels)
14840 .width(3)
14841 .input_zero_point(255)
14842 .kernel_zero_point(0)
14843 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14844 }
14845 }
14846
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,kernel_zero_point_only)14847 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, kernel_zero_point_only) {
14848 for (size_t channels = 1; channels <= 10; channels += 1) {
14849 DWConvMicrokernelTester()
14850 .cr(2)
14851 .kr(25)
14852 .channels(channels)
14853 .width(3)
14854 .input_zero_point(0)
14855 .kernel_zero_point(255)
14856 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14857 }
14858 }
14859
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,input_offset)14860 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, input_offset) {
14861 for (uint32_t channels = 4; channels < 32; channels += 6) {
14862 DWConvMicrokernelTester()
14863 .cr(2)
14864 .kr(25)
14865 .channels(channels)
14866 .input_offset(80)
14867 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14868 }
14869 }
14870
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC,zero)14871 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_IMAGIC, zero) {
14872 for (uint32_t mz = 0; mz < 25; mz++) {
14873 for (uint32_t channels = 4; channels < 32; channels += 6) {
14874 DWConvMicrokernelTester()
14875 .cr(2)
14876 .kr(25)
14877 .channels(channels)
14878 .input_offset(80)
14879 .zero_index(mz)
14880 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
14881 }
14882 }
14883 }
14884
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_eq_2)14885 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_eq_2) {
14886 DWConvMicrokernelTester()
14887 .cr(2)
14888 .kr(25)
14889 .channels(2)
14890 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14891 }
14892
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_div_2)14893 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2) {
14894 for (uint32_t channels = 4; channels < 32; channels += 6) {
14895 DWConvMicrokernelTester()
14896 .cr(2)
14897 .kr(25)
14898 .channels(channels)
14899 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14900 }
14901 }
14902
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_div_2_with_qmin)14903 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmin) {
14904 for (uint32_t channels = 4; channels < 32; channels += 6) {
14905 DWConvMicrokernelTester()
14906 .cr(2)
14907 .kr(25)
14908 .channels(channels)
14909 .qmin(128)
14910 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14911 }
14912 }
14913
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_div_2_with_qmax)14914 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_div_2_with_qmax) {
14915 for (uint32_t channels = 4; channels < 32; channels += 6) {
14916 DWConvMicrokernelTester()
14917 .cr(2)
14918 .kr(25)
14919 .channels(channels)
14920 .qmax(128)
14921 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14922 }
14923 }
14924
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_lt_2)14925 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_lt_2) {
14926 for (uint32_t channels = 1; channels < 2; channels++) {
14927 DWConvMicrokernelTester()
14928 .cr(2)
14929 .kr(25)
14930 .channels(channels)
14931 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14932 }
14933 }
14934
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_gt_2)14935 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2) {
14936 for (uint32_t channels = 3; channels < 4; channels++) {
14937 DWConvMicrokernelTester()
14938 .cr(2)
14939 .kr(25)
14940 .channels(channels)
14941 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14942 }
14943 }
14944
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_gt_2_with_qmin)14945 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmin) {
14946 for (uint32_t channels = 3; channels < 4; channels++) {
14947 DWConvMicrokernelTester()
14948 .cr(2)
14949 .kr(25)
14950 .channels(channels)
14951 .qmin(128)
14952 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14953 }
14954 }
14955
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,c_gt_2_with_qmax)14956 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, c_gt_2_with_qmax) {
14957 for (uint32_t channels = 3; channels < 4; channels++) {
14958 DWConvMicrokernelTester()
14959 .cr(2)
14960 .kr(25)
14961 .channels(channels)
14962 .qmax(128)
14963 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14964 }
14965 }
14966
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,multipixel)14967 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel) {
14968 for (size_t channels = 1; channels <= 10; channels += 1) {
14969 DWConvMicrokernelTester()
14970 .cr(2)
14971 .kr(25)
14972 .channels(channels)
14973 .width(3)
14974 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14975 }
14976 }
14977
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,multipixel_with_step)14978 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_step) {
14979 for (size_t channels = 1; channels <= 10; channels += 1) {
14980 for (size_t step = 2; step <= 25; step++) {
14981 DWConvMicrokernelTester()
14982 .cr(2)
14983 .kr(25)
14984 .channels(channels)
14985 .width(3)
14986 .step(step)
14987 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
14988 }
14989 }
14990 }
14991
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,multipixel_with_output_stride)14992 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_output_stride) {
14993 for (size_t channels = 1; channels <= 10; channels += 1) {
14994 DWConvMicrokernelTester()
14995 .cr(2)
14996 .kr(25)
14997 .channels(2)
14998 .width(5)
14999 .output_stride(13)
15000 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
15001 }
15002 }
15003
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,multipixel_with_qmin)15004 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmin) {
15005 for (size_t channels = 1; channels <= 10; channels += 1) {
15006 DWConvMicrokernelTester()
15007 .cr(2)
15008 .kr(25)
15009 .channels(channels)
15010 .width(3)
15011 .qmin(128)
15012 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
15013 }
15014 }
15015
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,multipixel_with_qmax)15016 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, multipixel_with_qmax) {
15017 for (size_t channels = 1; channels <= 10; channels += 1) {
15018 DWConvMicrokernelTester()
15019 .cr(2)
15020 .kr(25)
15021 .channels(channels)
15022 .width(3)
15023 .qmax(128)
15024 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
15025 }
15026 }
15027
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,input_zero_point_only)15028 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, input_zero_point_only) {
15029 for (size_t channels = 1; channels <= 10; channels += 1) {
15030 DWConvMicrokernelTester()
15031 .cr(2)
15032 .kr(25)
15033 .channels(channels)
15034 .width(3)
15035 .input_zero_point(255)
15036 .kernel_zero_point(0)
15037 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
15038 }
15039 }
15040
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,kernel_zero_point_only)15041 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, kernel_zero_point_only) {
15042 for (size_t channels = 1; channels <= 10; channels += 1) {
15043 DWConvMicrokernelTester()
15044 .cr(2)
15045 .kr(25)
15046 .channels(channels)
15047 .width(3)
15048 .input_zero_point(0)
15049 .kernel_zero_point(255)
15050 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
15051 }
15052 }
15053
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,input_offset)15054 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, input_offset) {
15055 for (uint32_t channels = 4; channels < 32; channels += 6) {
15056 DWConvMicrokernelTester()
15057 .cr(2)
15058 .kr(25)
15059 .channels(channels)
15060 .input_offset(80)
15061 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
15062 }
15063 }
15064
TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF,zero)15065 TEST(QU8_DWCONV_MINMAX_FP32_UP2X25__SCALAR_LRINTF, zero) {
15066 for (uint32_t mz = 0; mz < 25; mz++) {
15067 for (uint32_t channels = 4; channels < 32; channels += 6) {
15068 DWConvMicrokernelTester()
15069 .cr(2)
15070 .kr(25)
15071 .channels(channels)
15072 .input_offset(80)
15073 .zero_index(mz)
15074 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up2x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
15075 }
15076 }
15077 }
15078
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_eq_4)15079 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_eq_4) {
15080 DWConvMicrokernelTester()
15081 .cr(4)
15082 .kr(9)
15083 .channels(4)
15084 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15085 }
15086
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_div_4)15087 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_div_4) {
15088 for (uint32_t channels = 8; channels < 64; channels += 12) {
15089 DWConvMicrokernelTester()
15090 .cr(4)
15091 .kr(9)
15092 .channels(channels)
15093 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15094 }
15095 }
15096
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_div_4_with_qmin)15097 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_div_4_with_qmin) {
15098 for (uint32_t channels = 8; channels < 64; channels += 12) {
15099 DWConvMicrokernelTester()
15100 .cr(4)
15101 .kr(9)
15102 .channels(channels)
15103 .qmin(128)
15104 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15105 }
15106 }
15107
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_div_4_with_qmax)15108 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_div_4_with_qmax) {
15109 for (uint32_t channels = 8; channels < 64; channels += 12) {
15110 DWConvMicrokernelTester()
15111 .cr(4)
15112 .kr(9)
15113 .channels(channels)
15114 .qmax(128)
15115 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15116 }
15117 }
15118
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_lt_4)15119 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_lt_4) {
15120 for (uint32_t channels = 1; channels < 4; channels++) {
15121 DWConvMicrokernelTester()
15122 .cr(4)
15123 .kr(9)
15124 .channels(channels)
15125 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15126 }
15127 }
15128
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_gt_4)15129 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_gt_4) {
15130 for (uint32_t channels = 5; channels < 8; channels++) {
15131 DWConvMicrokernelTester()
15132 .cr(4)
15133 .kr(9)
15134 .channels(channels)
15135 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15136 }
15137 }
15138
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_gt_4_with_qmin)15139 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_gt_4_with_qmin) {
15140 for (uint32_t channels = 5; channels < 8; channels++) {
15141 DWConvMicrokernelTester()
15142 .cr(4)
15143 .kr(9)
15144 .channels(channels)
15145 .qmin(128)
15146 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15147 }
15148 }
15149
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,c_gt_4_with_qmax)15150 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, c_gt_4_with_qmax) {
15151 for (uint32_t channels = 5; channels < 8; channels++) {
15152 DWConvMicrokernelTester()
15153 .cr(4)
15154 .kr(9)
15155 .channels(channels)
15156 .qmax(128)
15157 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15158 }
15159 }
15160
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,multipixel)15161 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel) {
15162 for (size_t channels = 1; channels <= 20; channels += 3) {
15163 DWConvMicrokernelTester()
15164 .cr(4)
15165 .kr(9)
15166 .channels(channels)
15167 .width(3)
15168 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15169 }
15170 }
15171
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,multipixel_with_step)15172 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_step) {
15173 for (size_t channels = 1; channels <= 20; channels += 3) {
15174 for (size_t step = 2; step <= 9; step++) {
15175 DWConvMicrokernelTester()
15176 .cr(4)
15177 .kr(9)
15178 .channels(channels)
15179 .width(3)
15180 .step(step)
15181 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15182 }
15183 }
15184 }
15185
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,multipixel_with_output_stride)15186 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_output_stride) {
15187 for (size_t channels = 1; channels <= 20; channels += 3) {
15188 DWConvMicrokernelTester()
15189 .cr(4)
15190 .kr(9)
15191 .channels(4)
15192 .width(5)
15193 .output_stride(23)
15194 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15195 }
15196 }
15197
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,multipixel_with_qmin)15198 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_qmin) {
15199 for (size_t channels = 1; channels <= 20; channels += 3) {
15200 DWConvMicrokernelTester()
15201 .cr(4)
15202 .kr(9)
15203 .channels(channels)
15204 .width(3)
15205 .qmin(128)
15206 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15207 }
15208 }
15209
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,multipixel_with_qmax)15210 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, multipixel_with_qmax) {
15211 for (size_t channels = 1; channels <= 20; channels += 3) {
15212 DWConvMicrokernelTester()
15213 .cr(4)
15214 .kr(9)
15215 .channels(channels)
15216 .width(3)
15217 .qmax(128)
15218 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15219 }
15220 }
15221
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,input_zero_point_only)15222 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, input_zero_point_only) {
15223 for (size_t channels = 1; channels <= 20; channels += 3) {
15224 DWConvMicrokernelTester()
15225 .cr(4)
15226 .kr(9)
15227 .channels(channels)
15228 .width(3)
15229 .input_zero_point(255)
15230 .kernel_zero_point(0)
15231 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15232 }
15233 }
15234
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,kernel_zero_point_only)15235 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, kernel_zero_point_only) {
15236 for (size_t channels = 1; channels <= 20; channels += 3) {
15237 DWConvMicrokernelTester()
15238 .cr(4)
15239 .kr(9)
15240 .channels(channels)
15241 .width(3)
15242 .input_zero_point(0)
15243 .kernel_zero_point(255)
15244 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15245 }
15246 }
15247
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,input_offset)15248 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, input_offset) {
15249 for (uint32_t channels = 8; channels < 64; channels += 12) {
15250 DWConvMicrokernelTester()
15251 .cr(4)
15252 .kr(9)
15253 .channels(channels)
15254 .input_offset(112)
15255 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15256 }
15257 }
15258
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC,zero)15259 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_FMAGIC, zero) {
15260 for (uint32_t mz = 0; mz < 9; mz++) {
15261 for (uint32_t channels = 8; channels < 64; channels += 12) {
15262 DWConvMicrokernelTester()
15263 .cr(4)
15264 .kr(9)
15265 .channels(channels)
15266 .input_offset(112)
15267 .zero_index(mz)
15268 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15269 }
15270 }
15271 }
15272
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_eq_4)15273 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_eq_4) {
15274 DWConvMicrokernelTester()
15275 .cr(4)
15276 .kr(9)
15277 .channels(4)
15278 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15279 }
15280
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_div_4)15281 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4) {
15282 for (uint32_t channels = 8; channels < 64; channels += 12) {
15283 DWConvMicrokernelTester()
15284 .cr(4)
15285 .kr(9)
15286 .channels(channels)
15287 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15288 }
15289 }
15290
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_div_4_with_qmin)15291 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4_with_qmin) {
15292 for (uint32_t channels = 8; channels < 64; channels += 12) {
15293 DWConvMicrokernelTester()
15294 .cr(4)
15295 .kr(9)
15296 .channels(channels)
15297 .qmin(128)
15298 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15299 }
15300 }
15301
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_div_4_with_qmax)15302 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_div_4_with_qmax) {
15303 for (uint32_t channels = 8; channels < 64; channels += 12) {
15304 DWConvMicrokernelTester()
15305 .cr(4)
15306 .kr(9)
15307 .channels(channels)
15308 .qmax(128)
15309 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15310 }
15311 }
15312
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_lt_4)15313 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_lt_4) {
15314 for (uint32_t channels = 1; channels < 4; channels++) {
15315 DWConvMicrokernelTester()
15316 .cr(4)
15317 .kr(9)
15318 .channels(channels)
15319 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15320 }
15321 }
15322
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_gt_4)15323 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4) {
15324 for (uint32_t channels = 5; channels < 8; channels++) {
15325 DWConvMicrokernelTester()
15326 .cr(4)
15327 .kr(9)
15328 .channels(channels)
15329 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15330 }
15331 }
15332
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_gt_4_with_qmin)15333 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4_with_qmin) {
15334 for (uint32_t channels = 5; channels < 8; channels++) {
15335 DWConvMicrokernelTester()
15336 .cr(4)
15337 .kr(9)
15338 .channels(channels)
15339 .qmin(128)
15340 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15341 }
15342 }
15343
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,c_gt_4_with_qmax)15344 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, c_gt_4_with_qmax) {
15345 for (uint32_t channels = 5; channels < 8; channels++) {
15346 DWConvMicrokernelTester()
15347 .cr(4)
15348 .kr(9)
15349 .channels(channels)
15350 .qmax(128)
15351 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15352 }
15353 }
15354
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,multipixel)15355 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel) {
15356 for (size_t channels = 1; channels <= 20; channels += 3) {
15357 DWConvMicrokernelTester()
15358 .cr(4)
15359 .kr(9)
15360 .channels(channels)
15361 .width(3)
15362 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15363 }
15364 }
15365
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,multipixel_with_step)15366 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_step) {
15367 for (size_t channels = 1; channels <= 20; channels += 3) {
15368 for (size_t step = 2; step <= 9; step++) {
15369 DWConvMicrokernelTester()
15370 .cr(4)
15371 .kr(9)
15372 .channels(channels)
15373 .width(3)
15374 .step(step)
15375 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15376 }
15377 }
15378 }
15379
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,multipixel_with_output_stride)15380 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_output_stride) {
15381 for (size_t channels = 1; channels <= 20; channels += 3) {
15382 DWConvMicrokernelTester()
15383 .cr(4)
15384 .kr(9)
15385 .channels(4)
15386 .width(5)
15387 .output_stride(23)
15388 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15389 }
15390 }
15391
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,multipixel_with_qmin)15392 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_qmin) {
15393 for (size_t channels = 1; channels <= 20; channels += 3) {
15394 DWConvMicrokernelTester()
15395 .cr(4)
15396 .kr(9)
15397 .channels(channels)
15398 .width(3)
15399 .qmin(128)
15400 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15401 }
15402 }
15403
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,multipixel_with_qmax)15404 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, multipixel_with_qmax) {
15405 for (size_t channels = 1; channels <= 20; channels += 3) {
15406 DWConvMicrokernelTester()
15407 .cr(4)
15408 .kr(9)
15409 .channels(channels)
15410 .width(3)
15411 .qmax(128)
15412 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15413 }
15414 }
15415
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,input_zero_point_only)15416 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, input_zero_point_only) {
15417 for (size_t channels = 1; channels <= 20; channels += 3) {
15418 DWConvMicrokernelTester()
15419 .cr(4)
15420 .kr(9)
15421 .channels(channels)
15422 .width(3)
15423 .input_zero_point(255)
15424 .kernel_zero_point(0)
15425 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15426 }
15427 }
15428
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,kernel_zero_point_only)15429 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, kernel_zero_point_only) {
15430 for (size_t channels = 1; channels <= 20; channels += 3) {
15431 DWConvMicrokernelTester()
15432 .cr(4)
15433 .kr(9)
15434 .channels(channels)
15435 .width(3)
15436 .input_zero_point(0)
15437 .kernel_zero_point(255)
15438 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15439 }
15440 }
15441
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,input_offset)15442 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, input_offset) {
15443 for (uint32_t channels = 8; channels < 64; channels += 12) {
15444 DWConvMicrokernelTester()
15445 .cr(4)
15446 .kr(9)
15447 .channels(channels)
15448 .input_offset(112)
15449 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15450 }
15451 }
15452
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC,zero)15453 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_IMAGIC, zero) {
15454 for (uint32_t mz = 0; mz < 9; mz++) {
15455 for (uint32_t channels = 8; channels < 64; channels += 12) {
15456 DWConvMicrokernelTester()
15457 .cr(4)
15458 .kr(9)
15459 .channels(channels)
15460 .input_offset(112)
15461 .zero_index(mz)
15462 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15463 }
15464 }
15465 }
15466
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_eq_4)15467 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_eq_4) {
15468 DWConvMicrokernelTester()
15469 .cr(4)
15470 .kr(9)
15471 .channels(4)
15472 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
15473 }
15474
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_div_4)15475 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4) {
15476 for (uint32_t channels = 8; channels < 64; channels += 12) {
15477 DWConvMicrokernelTester()
15478 .cr(4)
15479 .kr(9)
15480 .channels(channels)
15481 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
15482 }
15483 }
15484
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_div_4_with_qmin)15485 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmin) {
15486 for (uint32_t channels = 8; channels < 64; channels += 12) {
15487 DWConvMicrokernelTester()
15488 .cr(4)
15489 .kr(9)
15490 .channels(channels)
15491 .qmin(128)
15492 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
15493 }
15494 }
15495
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_div_4_with_qmax)15496 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_div_4_with_qmax) {
15497 for (uint32_t channels = 8; channels < 64; channels += 12) {
15498 DWConvMicrokernelTester()
15499 .cr(4)
15500 .kr(9)
15501 .channels(channels)
15502 .qmax(128)
15503 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
15504 }
15505 }
15506
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_lt_4)15507 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_lt_4) {
15508 for (uint32_t channels = 1; channels < 4; channels++) {
15509 DWConvMicrokernelTester()
15510 .cr(4)
15511 .kr(9)
15512 .channels(channels)
15513 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
15514 }
15515 }
15516
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_gt_4)15517 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4) {
15518 for (uint32_t channels = 5; channels < 8; channels++) {
15519 DWConvMicrokernelTester()
15520 .cr(4)
15521 .kr(9)
15522 .channels(channels)
15523 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
15524 }
15525 }
15526
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_gt_4_with_qmin)15527 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmin) {
15528 for (uint32_t channels = 5; channels < 8; channels++) {
15529 DWConvMicrokernelTester()
15530 .cr(4)
15531 .kr(9)
15532 .channels(channels)
15533 .qmin(128)
15534 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
15535 }
15536 }
15537
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,c_gt_4_with_qmax)15538 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, c_gt_4_with_qmax) {
15539 for (uint32_t channels = 5; channels < 8; channels++) {
15540 DWConvMicrokernelTester()
15541 .cr(4)
15542 .kr(9)
15543 .channels(channels)
15544 .qmax(128)
15545 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
15546 }
15547 }
15548
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,multipixel)15549 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel) {
15550 for (size_t channels = 1; channels <= 20; channels += 3) {
15551 DWConvMicrokernelTester()
15552 .cr(4)
15553 .kr(9)
15554 .channels(channels)
15555 .width(3)
15556 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
15557 }
15558 }
15559
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,multipixel_with_step)15560 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_step) {
15561 for (size_t channels = 1; channels <= 20; channels += 3) {
15562 for (size_t step = 2; step <= 9; step++) {
15563 DWConvMicrokernelTester()
15564 .cr(4)
15565 .kr(9)
15566 .channels(channels)
15567 .width(3)
15568 .step(step)
15569 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
15570 }
15571 }
15572 }
15573
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,multipixel_with_output_stride)15574 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_output_stride) {
15575 for (size_t channels = 1; channels <= 20; channels += 3) {
15576 DWConvMicrokernelTester()
15577 .cr(4)
15578 .kr(9)
15579 .channels(4)
15580 .width(5)
15581 .output_stride(23)
15582 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
15583 }
15584 }
15585
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,multipixel_with_qmin)15586 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmin) {
15587 for (size_t channels = 1; channels <= 20; channels += 3) {
15588 DWConvMicrokernelTester()
15589 .cr(4)
15590 .kr(9)
15591 .channels(channels)
15592 .width(3)
15593 .qmin(128)
15594 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
15595 }
15596 }
15597
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,multipixel_with_qmax)15598 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, multipixel_with_qmax) {
15599 for (size_t channels = 1; channels <= 20; channels += 3) {
15600 DWConvMicrokernelTester()
15601 .cr(4)
15602 .kr(9)
15603 .channels(channels)
15604 .width(3)
15605 .qmax(128)
15606 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
15607 }
15608 }
15609
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,input_zero_point_only)15610 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, input_zero_point_only) {
15611 for (size_t channels = 1; channels <= 20; channels += 3) {
15612 DWConvMicrokernelTester()
15613 .cr(4)
15614 .kr(9)
15615 .channels(channels)
15616 .width(3)
15617 .input_zero_point(255)
15618 .kernel_zero_point(0)
15619 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
15620 }
15621 }
15622
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,kernel_zero_point_only)15623 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, kernel_zero_point_only) {
15624 for (size_t channels = 1; channels <= 20; channels += 3) {
15625 DWConvMicrokernelTester()
15626 .cr(4)
15627 .kr(9)
15628 .channels(channels)
15629 .width(3)
15630 .input_zero_point(0)
15631 .kernel_zero_point(255)
15632 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
15633 }
15634 }
15635
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,input_offset)15636 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, input_offset) {
15637 for (uint32_t channels = 8; channels < 64; channels += 12) {
15638 DWConvMicrokernelTester()
15639 .cr(4)
15640 .kr(9)
15641 .channels(channels)
15642 .input_offset(112)
15643 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
15644 }
15645 }
15646
TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF,zero)15647 TEST(QU8_DWCONV_MINMAX_FP32_UP4X9__SCALAR_LRINTF, zero) {
15648 for (uint32_t mz = 0; mz < 9; mz++) {
15649 for (uint32_t channels = 8; channels < 64; channels += 12) {
15650 DWConvMicrokernelTester()
15651 .cr(4)
15652 .kr(9)
15653 .channels(channels)
15654 .input_offset(112)
15655 .zero_index(mz)
15656 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
15657 }
15658 }
15659 }
15660
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_eq_4)15661 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_eq_4) {
15662 DWConvMicrokernelTester()
15663 .cr(4)
15664 .kr(25)
15665 .channels(4)
15666 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15667 }
15668
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_div_4)15669 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_div_4) {
15670 for (uint32_t channels = 8; channels < 64; channels += 12) {
15671 DWConvMicrokernelTester()
15672 .cr(4)
15673 .kr(25)
15674 .channels(channels)
15675 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15676 }
15677 }
15678
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_div_4_with_qmin)15679 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_div_4_with_qmin) {
15680 for (uint32_t channels = 8; channels < 64; channels += 12) {
15681 DWConvMicrokernelTester()
15682 .cr(4)
15683 .kr(25)
15684 .channels(channels)
15685 .qmin(128)
15686 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15687 }
15688 }
15689
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_div_4_with_qmax)15690 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_div_4_with_qmax) {
15691 for (uint32_t channels = 8; channels < 64; channels += 12) {
15692 DWConvMicrokernelTester()
15693 .cr(4)
15694 .kr(25)
15695 .channels(channels)
15696 .qmax(128)
15697 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15698 }
15699 }
15700
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_lt_4)15701 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_lt_4) {
15702 for (uint32_t channels = 1; channels < 4; channels++) {
15703 DWConvMicrokernelTester()
15704 .cr(4)
15705 .kr(25)
15706 .channels(channels)
15707 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15708 }
15709 }
15710
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_gt_4)15711 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_gt_4) {
15712 for (uint32_t channels = 5; channels < 8; channels++) {
15713 DWConvMicrokernelTester()
15714 .cr(4)
15715 .kr(25)
15716 .channels(channels)
15717 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15718 }
15719 }
15720
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_gt_4_with_qmin)15721 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_gt_4_with_qmin) {
15722 for (uint32_t channels = 5; channels < 8; channels++) {
15723 DWConvMicrokernelTester()
15724 .cr(4)
15725 .kr(25)
15726 .channels(channels)
15727 .qmin(128)
15728 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15729 }
15730 }
15731
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,c_gt_4_with_qmax)15732 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, c_gt_4_with_qmax) {
15733 for (uint32_t channels = 5; channels < 8; channels++) {
15734 DWConvMicrokernelTester()
15735 .cr(4)
15736 .kr(25)
15737 .channels(channels)
15738 .qmax(128)
15739 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15740 }
15741 }
15742
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,multipixel)15743 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel) {
15744 for (size_t channels = 1; channels <= 20; channels += 3) {
15745 DWConvMicrokernelTester()
15746 .cr(4)
15747 .kr(25)
15748 .channels(channels)
15749 .width(3)
15750 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15751 }
15752 }
15753
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,multipixel_with_step)15754 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_step) {
15755 for (size_t channels = 1; channels <= 20; channels += 3) {
15756 for (size_t step = 2; step <= 25; step++) {
15757 DWConvMicrokernelTester()
15758 .cr(4)
15759 .kr(25)
15760 .channels(channels)
15761 .width(3)
15762 .step(step)
15763 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15764 }
15765 }
15766 }
15767
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,multipixel_with_output_stride)15768 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_output_stride) {
15769 for (size_t channels = 1; channels <= 20; channels += 3) {
15770 DWConvMicrokernelTester()
15771 .cr(4)
15772 .kr(25)
15773 .channels(4)
15774 .width(5)
15775 .output_stride(23)
15776 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15777 }
15778 }
15779
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,multipixel_with_qmin)15780 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_qmin) {
15781 for (size_t channels = 1; channels <= 20; channels += 3) {
15782 DWConvMicrokernelTester()
15783 .cr(4)
15784 .kr(25)
15785 .channels(channels)
15786 .width(3)
15787 .qmin(128)
15788 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15789 }
15790 }
15791
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,multipixel_with_qmax)15792 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, multipixel_with_qmax) {
15793 for (size_t channels = 1; channels <= 20; channels += 3) {
15794 DWConvMicrokernelTester()
15795 .cr(4)
15796 .kr(25)
15797 .channels(channels)
15798 .width(3)
15799 .qmax(128)
15800 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15801 }
15802 }
15803
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,input_zero_point_only)15804 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, input_zero_point_only) {
15805 for (size_t channels = 1; channels <= 20; channels += 3) {
15806 DWConvMicrokernelTester()
15807 .cr(4)
15808 .kr(25)
15809 .channels(channels)
15810 .width(3)
15811 .input_zero_point(255)
15812 .kernel_zero_point(0)
15813 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15814 }
15815 }
15816
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,kernel_zero_point_only)15817 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, kernel_zero_point_only) {
15818 for (size_t channels = 1; channels <= 20; channels += 3) {
15819 DWConvMicrokernelTester()
15820 .cr(4)
15821 .kr(25)
15822 .channels(channels)
15823 .width(3)
15824 .input_zero_point(0)
15825 .kernel_zero_point(255)
15826 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15827 }
15828 }
15829
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,input_offset)15830 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, input_offset) {
15831 for (uint32_t channels = 8; channels < 64; channels += 12) {
15832 DWConvMicrokernelTester()
15833 .cr(4)
15834 .kr(25)
15835 .channels(channels)
15836 .input_offset(112)
15837 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15838 }
15839 }
15840
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC,zero)15841 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_FMAGIC, zero) {
15842 for (uint32_t mz = 0; mz < 25; mz++) {
15843 for (uint32_t channels = 8; channels < 64; channels += 12) {
15844 DWConvMicrokernelTester()
15845 .cr(4)
15846 .kr(25)
15847 .channels(channels)
15848 .input_offset(112)
15849 .zero_index(mz)
15850 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
15851 }
15852 }
15853 }
15854
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_eq_4)15855 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_eq_4) {
15856 DWConvMicrokernelTester()
15857 .cr(4)
15858 .kr(25)
15859 .channels(4)
15860 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15861 }
15862
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_div_4)15863 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4) {
15864 for (uint32_t channels = 8; channels < 64; channels += 12) {
15865 DWConvMicrokernelTester()
15866 .cr(4)
15867 .kr(25)
15868 .channels(channels)
15869 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15870 }
15871 }
15872
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_div_4_with_qmin)15873 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4_with_qmin) {
15874 for (uint32_t channels = 8; channels < 64; channels += 12) {
15875 DWConvMicrokernelTester()
15876 .cr(4)
15877 .kr(25)
15878 .channels(channels)
15879 .qmin(128)
15880 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15881 }
15882 }
15883
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_div_4_with_qmax)15884 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_div_4_with_qmax) {
15885 for (uint32_t channels = 8; channels < 64; channels += 12) {
15886 DWConvMicrokernelTester()
15887 .cr(4)
15888 .kr(25)
15889 .channels(channels)
15890 .qmax(128)
15891 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15892 }
15893 }
15894
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_lt_4)15895 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_lt_4) {
15896 for (uint32_t channels = 1; channels < 4; channels++) {
15897 DWConvMicrokernelTester()
15898 .cr(4)
15899 .kr(25)
15900 .channels(channels)
15901 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15902 }
15903 }
15904
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_gt_4)15905 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4) {
15906 for (uint32_t channels = 5; channels < 8; channels++) {
15907 DWConvMicrokernelTester()
15908 .cr(4)
15909 .kr(25)
15910 .channels(channels)
15911 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15912 }
15913 }
15914
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_gt_4_with_qmin)15915 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4_with_qmin) {
15916 for (uint32_t channels = 5; channels < 8; channels++) {
15917 DWConvMicrokernelTester()
15918 .cr(4)
15919 .kr(25)
15920 .channels(channels)
15921 .qmin(128)
15922 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15923 }
15924 }
15925
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,c_gt_4_with_qmax)15926 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, c_gt_4_with_qmax) {
15927 for (uint32_t channels = 5; channels < 8; channels++) {
15928 DWConvMicrokernelTester()
15929 .cr(4)
15930 .kr(25)
15931 .channels(channels)
15932 .qmax(128)
15933 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15934 }
15935 }
15936
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,multipixel)15937 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel) {
15938 for (size_t channels = 1; channels <= 20; channels += 3) {
15939 DWConvMicrokernelTester()
15940 .cr(4)
15941 .kr(25)
15942 .channels(channels)
15943 .width(3)
15944 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15945 }
15946 }
15947
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,multipixel_with_step)15948 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_step) {
15949 for (size_t channels = 1; channels <= 20; channels += 3) {
15950 for (size_t step = 2; step <= 25; step++) {
15951 DWConvMicrokernelTester()
15952 .cr(4)
15953 .kr(25)
15954 .channels(channels)
15955 .width(3)
15956 .step(step)
15957 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15958 }
15959 }
15960 }
15961
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,multipixel_with_output_stride)15962 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_output_stride) {
15963 for (size_t channels = 1; channels <= 20; channels += 3) {
15964 DWConvMicrokernelTester()
15965 .cr(4)
15966 .kr(25)
15967 .channels(4)
15968 .width(5)
15969 .output_stride(23)
15970 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15971 }
15972 }
15973
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,multipixel_with_qmin)15974 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_qmin) {
15975 for (size_t channels = 1; channels <= 20; channels += 3) {
15976 DWConvMicrokernelTester()
15977 .cr(4)
15978 .kr(25)
15979 .channels(channels)
15980 .width(3)
15981 .qmin(128)
15982 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15983 }
15984 }
15985
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,multipixel_with_qmax)15986 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, multipixel_with_qmax) {
15987 for (size_t channels = 1; channels <= 20; channels += 3) {
15988 DWConvMicrokernelTester()
15989 .cr(4)
15990 .kr(25)
15991 .channels(channels)
15992 .width(3)
15993 .qmax(128)
15994 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
15995 }
15996 }
15997
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,input_zero_point_only)15998 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, input_zero_point_only) {
15999 for (size_t channels = 1; channels <= 20; channels += 3) {
16000 DWConvMicrokernelTester()
16001 .cr(4)
16002 .kr(25)
16003 .channels(channels)
16004 .width(3)
16005 .input_zero_point(255)
16006 .kernel_zero_point(0)
16007 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
16008 }
16009 }
16010
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,kernel_zero_point_only)16011 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, kernel_zero_point_only) {
16012 for (size_t channels = 1; channels <= 20; channels += 3) {
16013 DWConvMicrokernelTester()
16014 .cr(4)
16015 .kr(25)
16016 .channels(channels)
16017 .width(3)
16018 .input_zero_point(0)
16019 .kernel_zero_point(255)
16020 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
16021 }
16022 }
16023
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,input_offset)16024 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, input_offset) {
16025 for (uint32_t channels = 8; channels < 64; channels += 12) {
16026 DWConvMicrokernelTester()
16027 .cr(4)
16028 .kr(25)
16029 .channels(channels)
16030 .input_offset(112)
16031 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
16032 }
16033 }
16034
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC,zero)16035 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_IMAGIC, zero) {
16036 for (uint32_t mz = 0; mz < 25; mz++) {
16037 for (uint32_t channels = 8; channels < 64; channels += 12) {
16038 DWConvMicrokernelTester()
16039 .cr(4)
16040 .kr(25)
16041 .channels(channels)
16042 .input_offset(112)
16043 .zero_index(mz)
16044 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
16045 }
16046 }
16047 }
16048
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_eq_4)16049 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_eq_4) {
16050 DWConvMicrokernelTester()
16051 .cr(4)
16052 .kr(25)
16053 .channels(4)
16054 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
16055 }
16056
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_div_4)16057 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4) {
16058 for (uint32_t channels = 8; channels < 64; channels += 12) {
16059 DWConvMicrokernelTester()
16060 .cr(4)
16061 .kr(25)
16062 .channels(channels)
16063 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
16064 }
16065 }
16066
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_div_4_with_qmin)16067 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmin) {
16068 for (uint32_t channels = 8; channels < 64; channels += 12) {
16069 DWConvMicrokernelTester()
16070 .cr(4)
16071 .kr(25)
16072 .channels(channels)
16073 .qmin(128)
16074 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
16075 }
16076 }
16077
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_div_4_with_qmax)16078 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_div_4_with_qmax) {
16079 for (uint32_t channels = 8; channels < 64; channels += 12) {
16080 DWConvMicrokernelTester()
16081 .cr(4)
16082 .kr(25)
16083 .channels(channels)
16084 .qmax(128)
16085 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
16086 }
16087 }
16088
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_lt_4)16089 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_lt_4) {
16090 for (uint32_t channels = 1; channels < 4; channels++) {
16091 DWConvMicrokernelTester()
16092 .cr(4)
16093 .kr(25)
16094 .channels(channels)
16095 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
16096 }
16097 }
16098
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_gt_4)16099 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4) {
16100 for (uint32_t channels = 5; channels < 8; channels++) {
16101 DWConvMicrokernelTester()
16102 .cr(4)
16103 .kr(25)
16104 .channels(channels)
16105 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
16106 }
16107 }
16108
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_gt_4_with_qmin)16109 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmin) {
16110 for (uint32_t channels = 5; channels < 8; channels++) {
16111 DWConvMicrokernelTester()
16112 .cr(4)
16113 .kr(25)
16114 .channels(channels)
16115 .qmin(128)
16116 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
16117 }
16118 }
16119
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,c_gt_4_with_qmax)16120 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, c_gt_4_with_qmax) {
16121 for (uint32_t channels = 5; channels < 8; channels++) {
16122 DWConvMicrokernelTester()
16123 .cr(4)
16124 .kr(25)
16125 .channels(channels)
16126 .qmax(128)
16127 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
16128 }
16129 }
16130
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,multipixel)16131 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel) {
16132 for (size_t channels = 1; channels <= 20; channels += 3) {
16133 DWConvMicrokernelTester()
16134 .cr(4)
16135 .kr(25)
16136 .channels(channels)
16137 .width(3)
16138 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
16139 }
16140 }
16141
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,multipixel_with_step)16142 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_step) {
16143 for (size_t channels = 1; channels <= 20; channels += 3) {
16144 for (size_t step = 2; step <= 25; step++) {
16145 DWConvMicrokernelTester()
16146 .cr(4)
16147 .kr(25)
16148 .channels(channels)
16149 .width(3)
16150 .step(step)
16151 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
16152 }
16153 }
16154 }
16155
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,multipixel_with_output_stride)16156 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_output_stride) {
16157 for (size_t channels = 1; channels <= 20; channels += 3) {
16158 DWConvMicrokernelTester()
16159 .cr(4)
16160 .kr(25)
16161 .channels(4)
16162 .width(5)
16163 .output_stride(23)
16164 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
16165 }
16166 }
16167
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,multipixel_with_qmin)16168 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmin) {
16169 for (size_t channels = 1; channels <= 20; channels += 3) {
16170 DWConvMicrokernelTester()
16171 .cr(4)
16172 .kr(25)
16173 .channels(channels)
16174 .width(3)
16175 .qmin(128)
16176 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
16177 }
16178 }
16179
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,multipixel_with_qmax)16180 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, multipixel_with_qmax) {
16181 for (size_t channels = 1; channels <= 20; channels += 3) {
16182 DWConvMicrokernelTester()
16183 .cr(4)
16184 .kr(25)
16185 .channels(channels)
16186 .width(3)
16187 .qmax(128)
16188 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
16189 }
16190 }
16191
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,input_zero_point_only)16192 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, input_zero_point_only) {
16193 for (size_t channels = 1; channels <= 20; channels += 3) {
16194 DWConvMicrokernelTester()
16195 .cr(4)
16196 .kr(25)
16197 .channels(channels)
16198 .width(3)
16199 .input_zero_point(255)
16200 .kernel_zero_point(0)
16201 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
16202 }
16203 }
16204
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,kernel_zero_point_only)16205 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, kernel_zero_point_only) {
16206 for (size_t channels = 1; channels <= 20; channels += 3) {
16207 DWConvMicrokernelTester()
16208 .cr(4)
16209 .kr(25)
16210 .channels(channels)
16211 .width(3)
16212 .input_zero_point(0)
16213 .kernel_zero_point(255)
16214 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
16215 }
16216 }
16217
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,input_offset)16218 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, input_offset) {
16219 for (uint32_t channels = 8; channels < 64; channels += 12) {
16220 DWConvMicrokernelTester()
16221 .cr(4)
16222 .kr(25)
16223 .channels(channels)
16224 .input_offset(112)
16225 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
16226 }
16227 }
16228
TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF,zero)16229 TEST(QU8_DWCONV_MINMAX_FP32_UP4X25__SCALAR_LRINTF, zero) {
16230 for (uint32_t mz = 0; mz < 25; mz++) {
16231 for (uint32_t channels = 8; channels < 64; channels += 12) {
16232 DWConvMicrokernelTester()
16233 .cr(4)
16234 .kr(25)
16235 .channels(channels)
16236 .input_offset(112)
16237 .zero_index(mz)
16238 .Test(xnn_qu8_dwconv_minmax_fp32_ukernel_up4x25__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
16239 }
16240 }
16241 }