• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 //   Specification: test/qs8-dwconv-minmax-rndnu.yaml
11 //   Generator: tools/generate-dwconv-test.py
12 
13 
14 #include <gtest/gtest.h>
15 
16 #include <xnnpack/common.h>
17 #include <xnnpack/isa-checks.h>
18 
19 #include <xnnpack/dwconv.h>
20 #include "dwconv-microkernel-tester.h"
21 
22 
23 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,c_eq_8)24   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, c_eq_8) {
25     TEST_REQUIRES_ARM_NEON;
26     DWConvMicrokernelTester()
27       .cr(8)
28       .kr(9)
29       .channels(8)
30       .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
31   }
32 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,c_div_8)33   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, c_div_8) {
34     TEST_REQUIRES_ARM_NEON;
35     for (uint32_t channels = 16; channels < 128; channels += 24) {
36       DWConvMicrokernelTester()
37         .cr(8)
38         .kr(9)
39         .channels(channels)
40         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
41     }
42   }
43 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,c_div_8_with_qmin)44   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, c_div_8_with_qmin) {
45     TEST_REQUIRES_ARM_NEON;
46     for (uint32_t channels = 16; channels < 128; channels += 24) {
47       DWConvMicrokernelTester()
48         .cr(8)
49         .kr(9)
50         .channels(channels)
51         .qmin(128)
52         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
53     }
54   }
55 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,c_div_8_with_qmax)56   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, c_div_8_with_qmax) {
57     TEST_REQUIRES_ARM_NEON;
58     for (uint32_t channels = 16; channels < 128; channels += 24) {
59       DWConvMicrokernelTester()
60         .cr(8)
61         .kr(9)
62         .channels(channels)
63         .qmax(128)
64         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
65     }
66   }
67 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,c_lt_8)68   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, c_lt_8) {
69     TEST_REQUIRES_ARM_NEON;
70     for (uint32_t channels = 1; channels < 8; channels++) {
71       DWConvMicrokernelTester()
72         .cr(8)
73         .kr(9)
74         .channels(channels)
75         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
76     }
77   }
78 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,c_gt_8)79   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, c_gt_8) {
80     TEST_REQUIRES_ARM_NEON;
81     for (uint32_t channels = 9; channels < 16; channels++) {
82       DWConvMicrokernelTester()
83         .cr(8)
84         .kr(9)
85         .channels(channels)
86         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
87     }
88   }
89 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,c_gt_8_with_qmin)90   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, c_gt_8_with_qmin) {
91     TEST_REQUIRES_ARM_NEON;
92     for (uint32_t channels = 9; channels < 16; channels++) {
93       DWConvMicrokernelTester()
94         .cr(8)
95         .kr(9)
96         .channels(channels)
97         .qmin(128)
98         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
99     }
100   }
101 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,c_gt_8_with_qmax)102   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, c_gt_8_with_qmax) {
103     TEST_REQUIRES_ARM_NEON;
104     for (uint32_t channels = 9; channels < 16; channels++) {
105       DWConvMicrokernelTester()
106         .cr(8)
107         .kr(9)
108         .channels(channels)
109         .qmax(128)
110         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
111     }
112   }
113 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,multipixel)114   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, multipixel) {
115     TEST_REQUIRES_ARM_NEON;
116     for (size_t channels = 1; channels <= 40; channels += 7) {
117       DWConvMicrokernelTester()
118         .cr(8)
119         .kr(9)
120         .channels(channels)
121         .width(3)
122         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
123     }
124   }
125 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,multipixel_with_step)126   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, multipixel_with_step) {
127     TEST_REQUIRES_ARM_NEON;
128     for (size_t channels = 1; channels <= 40; channels += 7) {
129       for (size_t step = 2; step <= 9; step++) {
130         DWConvMicrokernelTester()
131           .cr(8)
132           .kr(9)
133           .channels(channels)
134           .width(3)
135           .step(step)
136           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
137       }
138     }
139   }
140 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,multipixel_with_output_stride)141   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, multipixel_with_output_stride) {
142     TEST_REQUIRES_ARM_NEON;
143     for (size_t channels = 1; channels <= 40; channels += 7) {
144       DWConvMicrokernelTester()
145         .cr(8)
146         .kr(9)
147         .channels(8)
148         .width(5)
149         .output_stride(43)
150         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
151     }
152   }
153 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,multipixel_with_qmin)154   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, multipixel_with_qmin) {
155     TEST_REQUIRES_ARM_NEON;
156     for (size_t channels = 1; channels <= 40; channels += 7) {
157       DWConvMicrokernelTester()
158         .cr(8)
159         .kr(9)
160         .channels(channels)
161         .width(3)
162         .qmin(128)
163         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
164     }
165   }
166 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,multipixel_with_qmax)167   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, multipixel_with_qmax) {
168     TEST_REQUIRES_ARM_NEON;
169     for (size_t channels = 1; channels <= 40; channels += 7) {
170       DWConvMicrokernelTester()
171         .cr(8)
172         .kr(9)
173         .channels(channels)
174         .width(3)
175         .qmax(128)
176         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
177     }
178   }
179 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,input_offset)180   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, input_offset) {
181     TEST_REQUIRES_ARM_NEON;
182     for (uint32_t channels = 16; channels < 128; channels += 24) {
183       DWConvMicrokernelTester()
184         .cr(8)
185         .kr(9)
186         .channels(channels)
187         .input_offset(176)
188         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
189     }
190   }
191 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,zero)192   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, zero) {
193     TEST_REQUIRES_ARM_NEON;
194     for (uint32_t mz = 0; mz < 9; mz++) {
195       for (uint32_t channels = 16; channels < 128; channels += 24) {
196         DWConvMicrokernelTester()
197           .cr(8)
198           .kr(9)
199           .channels(channels)
200           .input_offset(176)
201           .zero_index(mz)
202           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
203       }
204     }
205   }
206 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
207 
208 
209 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,c_eq_8)210   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, c_eq_8) {
211     TEST_REQUIRES_ARM_NEON;
212     DWConvMicrokernelTester()
213       .cr(8)
214       .kr(9)
215       .channels(8)
216       .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
217   }
218 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,c_div_8)219   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, c_div_8) {
220     TEST_REQUIRES_ARM_NEON;
221     for (uint32_t channels = 16; channels < 128; channels += 24) {
222       DWConvMicrokernelTester()
223         .cr(8)
224         .kr(9)
225         .channels(channels)
226         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
227     }
228   }
229 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,c_div_8_with_qmin)230   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, c_div_8_with_qmin) {
231     TEST_REQUIRES_ARM_NEON;
232     for (uint32_t channels = 16; channels < 128; channels += 24) {
233       DWConvMicrokernelTester()
234         .cr(8)
235         .kr(9)
236         .channels(channels)
237         .qmin(128)
238         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
239     }
240   }
241 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,c_div_8_with_qmax)242   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, c_div_8_with_qmax) {
243     TEST_REQUIRES_ARM_NEON;
244     for (uint32_t channels = 16; channels < 128; channels += 24) {
245       DWConvMicrokernelTester()
246         .cr(8)
247         .kr(9)
248         .channels(channels)
249         .qmax(128)
250         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
251     }
252   }
253 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,c_lt_8)254   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, c_lt_8) {
255     TEST_REQUIRES_ARM_NEON;
256     for (uint32_t channels = 1; channels < 8; channels++) {
257       DWConvMicrokernelTester()
258         .cr(8)
259         .kr(9)
260         .channels(channels)
261         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
262     }
263   }
264 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,c_gt_8)265   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, c_gt_8) {
266     TEST_REQUIRES_ARM_NEON;
267     for (uint32_t channels = 9; channels < 16; channels++) {
268       DWConvMicrokernelTester()
269         .cr(8)
270         .kr(9)
271         .channels(channels)
272         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
273     }
274   }
275 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,c_gt_8_with_qmin)276   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, c_gt_8_with_qmin) {
277     TEST_REQUIRES_ARM_NEON;
278     for (uint32_t channels = 9; channels < 16; channels++) {
279       DWConvMicrokernelTester()
280         .cr(8)
281         .kr(9)
282         .channels(channels)
283         .qmin(128)
284         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
285     }
286   }
287 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,c_gt_8_with_qmax)288   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, c_gt_8_with_qmax) {
289     TEST_REQUIRES_ARM_NEON;
290     for (uint32_t channels = 9; channels < 16; channels++) {
291       DWConvMicrokernelTester()
292         .cr(8)
293         .kr(9)
294         .channels(channels)
295         .qmax(128)
296         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
297     }
298   }
299 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,multipixel)300   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, multipixel) {
301     TEST_REQUIRES_ARM_NEON;
302     for (size_t channels = 1; channels <= 40; channels += 7) {
303       DWConvMicrokernelTester()
304         .cr(8)
305         .kr(9)
306         .channels(channels)
307         .width(3)
308         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
309     }
310   }
311 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,multipixel_with_step)312   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, multipixel_with_step) {
313     TEST_REQUIRES_ARM_NEON;
314     for (size_t channels = 1; channels <= 40; channels += 7) {
315       for (size_t step = 2; step <= 9; step++) {
316         DWConvMicrokernelTester()
317           .cr(8)
318           .kr(9)
319           .channels(channels)
320           .width(3)
321           .step(step)
322           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
323       }
324     }
325   }
326 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,multipixel_with_output_stride)327   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, multipixel_with_output_stride) {
328     TEST_REQUIRES_ARM_NEON;
329     for (size_t channels = 1; channels <= 40; channels += 7) {
330       DWConvMicrokernelTester()
331         .cr(8)
332         .kr(9)
333         .channels(8)
334         .width(5)
335         .output_stride(43)
336         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
337     }
338   }
339 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,multipixel_with_qmin)340   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, multipixel_with_qmin) {
341     TEST_REQUIRES_ARM_NEON;
342     for (size_t channels = 1; channels <= 40; channels += 7) {
343       DWConvMicrokernelTester()
344         .cr(8)
345         .kr(9)
346         .channels(channels)
347         .width(3)
348         .qmin(128)
349         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
350     }
351   }
352 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,multipixel_with_qmax)353   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, multipixel_with_qmax) {
354     TEST_REQUIRES_ARM_NEON;
355     for (size_t channels = 1; channels <= 40; channels += 7) {
356       DWConvMicrokernelTester()
357         .cr(8)
358         .kr(9)
359         .channels(channels)
360         .width(3)
361         .qmax(128)
362         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
363     }
364   }
365 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,input_offset)366   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, input_offset) {
367     TEST_REQUIRES_ARM_NEON;
368     for (uint32_t channels = 16; channels < 128; channels += 24) {
369       DWConvMicrokernelTester()
370         .cr(8)
371         .kr(9)
372         .channels(channels)
373         .input_offset(176)
374         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
375     }
376   }
377 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,zero)378   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, zero) {
379     TEST_REQUIRES_ARM_NEON;
380     for (uint32_t mz = 0; mz < 9; mz++) {
381       for (uint32_t channels = 16; channels < 128; channels += 24) {
382         DWConvMicrokernelTester()
383           .cr(8)
384           .kr(9)
385           .channels(channels)
386           .input_offset(176)
387           .zero_index(mz)
388           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
389       }
390     }
391   }
392 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
393 
394 
395 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_eq_8)396   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_eq_8) {
397     TEST_REQUIRES_ARM_NEON;
398     DWConvMicrokernelTester()
399       .cr(8)
400       .kr(9)
401       .channels(8)
402       .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
403   }
404 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_div_8)405   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_div_8) {
406     TEST_REQUIRES_ARM_NEON;
407     for (uint32_t channels = 16; channels < 128; channels += 24) {
408       DWConvMicrokernelTester()
409         .cr(8)
410         .kr(9)
411         .channels(channels)
412         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
413     }
414   }
415 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_div_8_with_qmin)416   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_div_8_with_qmin) {
417     TEST_REQUIRES_ARM_NEON;
418     for (uint32_t channels = 16; channels < 128; channels += 24) {
419       DWConvMicrokernelTester()
420         .cr(8)
421         .kr(9)
422         .channels(channels)
423         .qmin(128)
424         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
425     }
426   }
427 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_div_8_with_qmax)428   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_div_8_with_qmax) {
429     TEST_REQUIRES_ARM_NEON;
430     for (uint32_t channels = 16; channels < 128; channels += 24) {
431       DWConvMicrokernelTester()
432         .cr(8)
433         .kr(9)
434         .channels(channels)
435         .qmax(128)
436         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
437     }
438   }
439 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_lt_8)440   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_lt_8) {
441     TEST_REQUIRES_ARM_NEON;
442     for (uint32_t channels = 1; channels < 8; channels++) {
443       DWConvMicrokernelTester()
444         .cr(8)
445         .kr(9)
446         .channels(channels)
447         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
448     }
449   }
450 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_gt_8)451   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_gt_8) {
452     TEST_REQUIRES_ARM_NEON;
453     for (uint32_t channels = 9; channels < 16; channels++) {
454       DWConvMicrokernelTester()
455         .cr(8)
456         .kr(9)
457         .channels(channels)
458         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
459     }
460   }
461 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_gt_8_with_qmin)462   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_gt_8_with_qmin) {
463     TEST_REQUIRES_ARM_NEON;
464     for (uint32_t channels = 9; channels < 16; channels++) {
465       DWConvMicrokernelTester()
466         .cr(8)
467         .kr(9)
468         .channels(channels)
469         .qmin(128)
470         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
471     }
472   }
473 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_gt_8_with_qmax)474   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_gt_8_with_qmax) {
475     TEST_REQUIRES_ARM_NEON;
476     for (uint32_t channels = 9; channels < 16; channels++) {
477       DWConvMicrokernelTester()
478         .cr(8)
479         .kr(9)
480         .channels(channels)
481         .qmax(128)
482         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
483     }
484   }
485 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,multipixel)486   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, multipixel) {
487     TEST_REQUIRES_ARM_NEON;
488     for (size_t channels = 1; channels <= 40; channels += 7) {
489       DWConvMicrokernelTester()
490         .cr(8)
491         .kr(9)
492         .channels(channels)
493         .width(3)
494         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
495     }
496   }
497 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,multipixel_with_step)498   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, multipixel_with_step) {
499     TEST_REQUIRES_ARM_NEON;
500     for (size_t channels = 1; channels <= 40; channels += 7) {
501       for (size_t step = 2; step <= 9; step++) {
502         DWConvMicrokernelTester()
503           .cr(8)
504           .kr(9)
505           .channels(channels)
506           .width(3)
507           .step(step)
508           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
509       }
510     }
511   }
512 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,multipixel_with_output_stride)513   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, multipixel_with_output_stride) {
514     TEST_REQUIRES_ARM_NEON;
515     for (size_t channels = 1; channels <= 40; channels += 7) {
516       DWConvMicrokernelTester()
517         .cr(8)
518         .kr(9)
519         .channels(8)
520         .width(5)
521         .output_stride(43)
522         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
523     }
524   }
525 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,multipixel_with_qmin)526   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, multipixel_with_qmin) {
527     TEST_REQUIRES_ARM_NEON;
528     for (size_t channels = 1; channels <= 40; channels += 7) {
529       DWConvMicrokernelTester()
530         .cr(8)
531         .kr(9)
532         .channels(channels)
533         .width(3)
534         .qmin(128)
535         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
536     }
537   }
538 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,multipixel_with_qmax)539   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, multipixel_with_qmax) {
540     TEST_REQUIRES_ARM_NEON;
541     for (size_t channels = 1; channels <= 40; channels += 7) {
542       DWConvMicrokernelTester()
543         .cr(8)
544         .kr(9)
545         .channels(channels)
546         .width(3)
547         .qmax(128)
548         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
549     }
550   }
551 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,input_offset)552   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, input_offset) {
553     TEST_REQUIRES_ARM_NEON;
554     for (uint32_t channels = 16; channels < 128; channels += 24) {
555       DWConvMicrokernelTester()
556         .cr(8)
557         .kr(9)
558         .channels(channels)
559         .input_offset(176)
560         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
561     }
562   }
563 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,zero)564   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, zero) {
565     TEST_REQUIRES_ARM_NEON;
566     for (uint32_t mz = 0; mz < 9; mz++) {
567       for (uint32_t channels = 16; channels < 128; channels += 24) {
568         DWConvMicrokernelTester()
569           .cr(8)
570           .kr(9)
571           .channels(channels)
572           .input_offset(176)
573           .zero_index(mz)
574           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
575       }
576     }
577   }
578 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
579 
580 
581 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,c_eq_8)582   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, c_eq_8) {
583     TEST_REQUIRES_ARM_NEON;
584     DWConvMicrokernelTester()
585       .cr(8)
586       .kr(25)
587       .channels(8)
588       .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
589   }
590 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,c_div_8)591   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, c_div_8) {
592     TEST_REQUIRES_ARM_NEON;
593     for (uint32_t channels = 16; channels < 128; channels += 24) {
594       DWConvMicrokernelTester()
595         .cr(8)
596         .kr(25)
597         .channels(channels)
598         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
599     }
600   }
601 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,c_div_8_with_qmin)602   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, c_div_8_with_qmin) {
603     TEST_REQUIRES_ARM_NEON;
604     for (uint32_t channels = 16; channels < 128; channels += 24) {
605       DWConvMicrokernelTester()
606         .cr(8)
607         .kr(25)
608         .channels(channels)
609         .qmin(128)
610         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
611     }
612   }
613 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,c_div_8_with_qmax)614   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, c_div_8_with_qmax) {
615     TEST_REQUIRES_ARM_NEON;
616     for (uint32_t channels = 16; channels < 128; channels += 24) {
617       DWConvMicrokernelTester()
618         .cr(8)
619         .kr(25)
620         .channels(channels)
621         .qmax(128)
622         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
623     }
624   }
625 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,c_lt_8)626   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, c_lt_8) {
627     TEST_REQUIRES_ARM_NEON;
628     for (uint32_t channels = 1; channels < 8; channels++) {
629       DWConvMicrokernelTester()
630         .cr(8)
631         .kr(25)
632         .channels(channels)
633         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
634     }
635   }
636 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,c_gt_8)637   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, c_gt_8) {
638     TEST_REQUIRES_ARM_NEON;
639     for (uint32_t channels = 9; channels < 16; channels++) {
640       DWConvMicrokernelTester()
641         .cr(8)
642         .kr(25)
643         .channels(channels)
644         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
645     }
646   }
647 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,c_gt_8_with_qmin)648   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, c_gt_8_with_qmin) {
649     TEST_REQUIRES_ARM_NEON;
650     for (uint32_t channels = 9; channels < 16; channels++) {
651       DWConvMicrokernelTester()
652         .cr(8)
653         .kr(25)
654         .channels(channels)
655         .qmin(128)
656         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
657     }
658   }
659 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,c_gt_8_with_qmax)660   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, c_gt_8_with_qmax) {
661     TEST_REQUIRES_ARM_NEON;
662     for (uint32_t channels = 9; channels < 16; channels++) {
663       DWConvMicrokernelTester()
664         .cr(8)
665         .kr(25)
666         .channels(channels)
667         .qmax(128)
668         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
669     }
670   }
671 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,multipixel)672   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, multipixel) {
673     TEST_REQUIRES_ARM_NEON;
674     for (size_t channels = 1; channels <= 40; channels += 7) {
675       DWConvMicrokernelTester()
676         .cr(8)
677         .kr(25)
678         .channels(channels)
679         .width(3)
680         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
681     }
682   }
683 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,multipixel_with_step)684   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, multipixel_with_step) {
685     TEST_REQUIRES_ARM_NEON;
686     for (size_t channels = 1; channels <= 40; channels += 7) {
687       for (size_t step = 2; step <= 25; step++) {
688         DWConvMicrokernelTester()
689           .cr(8)
690           .kr(25)
691           .channels(channels)
692           .width(3)
693           .step(step)
694           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
695       }
696     }
697   }
698 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,multipixel_with_output_stride)699   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, multipixel_with_output_stride) {
700     TEST_REQUIRES_ARM_NEON;
701     for (size_t channels = 1; channels <= 40; channels += 7) {
702       DWConvMicrokernelTester()
703         .cr(8)
704         .kr(25)
705         .channels(8)
706         .width(5)
707         .output_stride(43)
708         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
709     }
710   }
711 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,multipixel_with_qmin)712   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, multipixel_with_qmin) {
713     TEST_REQUIRES_ARM_NEON;
714     for (size_t channels = 1; channels <= 40; channels += 7) {
715       DWConvMicrokernelTester()
716         .cr(8)
717         .kr(25)
718         .channels(channels)
719         .width(3)
720         .qmin(128)
721         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
722     }
723   }
724 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,multipixel_with_qmax)725   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, multipixel_with_qmax) {
726     TEST_REQUIRES_ARM_NEON;
727     for (size_t channels = 1; channels <= 40; channels += 7) {
728       DWConvMicrokernelTester()
729         .cr(8)
730         .kr(25)
731         .channels(channels)
732         .width(3)
733         .qmax(128)
734         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
735     }
736   }
737 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,input_offset)738   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, input_offset) {
739     TEST_REQUIRES_ARM_NEON;
740     for (uint32_t channels = 16; channels < 128; channels += 24) {
741       DWConvMicrokernelTester()
742         .cr(8)
743         .kr(25)
744         .channels(channels)
745         .input_offset(176)
746         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
747     }
748   }
749 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,zero)750   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, zero) {
751     TEST_REQUIRES_ARM_NEON;
752     for (uint32_t mz = 0; mz < 25; mz++) {
753       for (uint32_t channels = 16; channels < 128; channels += 24) {
754         DWConvMicrokernelTester()
755           .cr(8)
756           .kr(25)
757           .channels(channels)
758           .input_offset(176)
759           .zero_index(mz)
760           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
761       }
762     }
763   }
764 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
765 
766 
767 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,c_eq_8)768   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, c_eq_8) {
769     TEST_REQUIRES_ARM_NEON;
770     DWConvMicrokernelTester()
771       .cr(8)
772       .kr(25)
773       .channels(8)
774       .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
775   }
776 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,c_div_8)777   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, c_div_8) {
778     TEST_REQUIRES_ARM_NEON;
779     for (uint32_t channels = 16; channels < 128; channels += 24) {
780       DWConvMicrokernelTester()
781         .cr(8)
782         .kr(25)
783         .channels(channels)
784         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
785     }
786   }
787 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,c_div_8_with_qmin)788   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, c_div_8_with_qmin) {
789     TEST_REQUIRES_ARM_NEON;
790     for (uint32_t channels = 16; channels < 128; channels += 24) {
791       DWConvMicrokernelTester()
792         .cr(8)
793         .kr(25)
794         .channels(channels)
795         .qmin(128)
796         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
797     }
798   }
799 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,c_div_8_with_qmax)800   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, c_div_8_with_qmax) {
801     TEST_REQUIRES_ARM_NEON;
802     for (uint32_t channels = 16; channels < 128; channels += 24) {
803       DWConvMicrokernelTester()
804         .cr(8)
805         .kr(25)
806         .channels(channels)
807         .qmax(128)
808         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
809     }
810   }
811 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,c_lt_8)812   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, c_lt_8) {
813     TEST_REQUIRES_ARM_NEON;
814     for (uint32_t channels = 1; channels < 8; channels++) {
815       DWConvMicrokernelTester()
816         .cr(8)
817         .kr(25)
818         .channels(channels)
819         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
820     }
821   }
822 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,c_gt_8)823   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, c_gt_8) {
824     TEST_REQUIRES_ARM_NEON;
825     for (uint32_t channels = 9; channels < 16; channels++) {
826       DWConvMicrokernelTester()
827         .cr(8)
828         .kr(25)
829         .channels(channels)
830         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
831     }
832   }
833 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,c_gt_8_with_qmin)834   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, c_gt_8_with_qmin) {
835     TEST_REQUIRES_ARM_NEON;
836     for (uint32_t channels = 9; channels < 16; channels++) {
837       DWConvMicrokernelTester()
838         .cr(8)
839         .kr(25)
840         .channels(channels)
841         .qmin(128)
842         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
843     }
844   }
845 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,c_gt_8_with_qmax)846   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, c_gt_8_with_qmax) {
847     TEST_REQUIRES_ARM_NEON;
848     for (uint32_t channels = 9; channels < 16; channels++) {
849       DWConvMicrokernelTester()
850         .cr(8)
851         .kr(25)
852         .channels(channels)
853         .qmax(128)
854         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
855     }
856   }
857 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,multipixel)858   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, multipixel) {
859     TEST_REQUIRES_ARM_NEON;
860     for (size_t channels = 1; channels <= 40; channels += 7) {
861       DWConvMicrokernelTester()
862         .cr(8)
863         .kr(25)
864         .channels(channels)
865         .width(3)
866         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
867     }
868   }
869 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,multipixel_with_step)870   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, multipixel_with_step) {
871     TEST_REQUIRES_ARM_NEON;
872     for (size_t channels = 1; channels <= 40; channels += 7) {
873       for (size_t step = 2; step <= 25; step++) {
874         DWConvMicrokernelTester()
875           .cr(8)
876           .kr(25)
877           .channels(channels)
878           .width(3)
879           .step(step)
880           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
881       }
882     }
883   }
884 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,multipixel_with_output_stride)885   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, multipixel_with_output_stride) {
886     TEST_REQUIRES_ARM_NEON;
887     for (size_t channels = 1; channels <= 40; channels += 7) {
888       DWConvMicrokernelTester()
889         .cr(8)
890         .kr(25)
891         .channels(8)
892         .width(5)
893         .output_stride(43)
894         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
895     }
896   }
897 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,multipixel_with_qmin)898   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, multipixel_with_qmin) {
899     TEST_REQUIRES_ARM_NEON;
900     for (size_t channels = 1; channels <= 40; channels += 7) {
901       DWConvMicrokernelTester()
902         .cr(8)
903         .kr(25)
904         .channels(channels)
905         .width(3)
906         .qmin(128)
907         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
908     }
909   }
910 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,multipixel_with_qmax)911   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, multipixel_with_qmax) {
912     TEST_REQUIRES_ARM_NEON;
913     for (size_t channels = 1; channels <= 40; channels += 7) {
914       DWConvMicrokernelTester()
915         .cr(8)
916         .kr(25)
917         .channels(channels)
918         .width(3)
919         .qmax(128)
920         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
921     }
922   }
923 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,input_offset)924   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, input_offset) {
925     TEST_REQUIRES_ARM_NEON;
926     for (uint32_t channels = 16; channels < 128; channels += 24) {
927       DWConvMicrokernelTester()
928         .cr(8)
929         .kr(25)
930         .channels(channels)
931         .input_offset(176)
932         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
933     }
934   }
935 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,zero)936   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, zero) {
937     TEST_REQUIRES_ARM_NEON;
938     for (uint32_t mz = 0; mz < 25; mz++) {
939       for (uint32_t channels = 16; channels < 128; channels += 24) {
940         DWConvMicrokernelTester()
941           .cr(8)
942           .kr(25)
943           .channels(channels)
944           .input_offset(176)
945           .zero_index(mz)
946           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
947       }
948     }
949   }
950 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
951 
952 
953 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_eq_8)954   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_eq_8) {
955     TEST_REQUIRES_ARM_NEON;
956     DWConvMicrokernelTester()
957       .cr(8)
958       .kr(25)
959       .channels(8)
960       .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
961   }
962 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_div_8)963   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_div_8) {
964     TEST_REQUIRES_ARM_NEON;
965     for (uint32_t channels = 16; channels < 128; channels += 24) {
966       DWConvMicrokernelTester()
967         .cr(8)
968         .kr(25)
969         .channels(channels)
970         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
971     }
972   }
973 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_div_8_with_qmin)974   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_div_8_with_qmin) {
975     TEST_REQUIRES_ARM_NEON;
976     for (uint32_t channels = 16; channels < 128; channels += 24) {
977       DWConvMicrokernelTester()
978         .cr(8)
979         .kr(25)
980         .channels(channels)
981         .qmin(128)
982         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
983     }
984   }
985 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_div_8_with_qmax)986   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_div_8_with_qmax) {
987     TEST_REQUIRES_ARM_NEON;
988     for (uint32_t channels = 16; channels < 128; channels += 24) {
989       DWConvMicrokernelTester()
990         .cr(8)
991         .kr(25)
992         .channels(channels)
993         .qmax(128)
994         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
995     }
996   }
997 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_lt_8)998   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_lt_8) {
999     TEST_REQUIRES_ARM_NEON;
1000     for (uint32_t channels = 1; channels < 8; channels++) {
1001       DWConvMicrokernelTester()
1002         .cr(8)
1003         .kr(25)
1004         .channels(channels)
1005         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1006     }
1007   }
1008 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_gt_8)1009   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_gt_8) {
1010     TEST_REQUIRES_ARM_NEON;
1011     for (uint32_t channels = 9; channels < 16; channels++) {
1012       DWConvMicrokernelTester()
1013         .cr(8)
1014         .kr(25)
1015         .channels(channels)
1016         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1017     }
1018   }
1019 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_gt_8_with_qmin)1020   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_gt_8_with_qmin) {
1021     TEST_REQUIRES_ARM_NEON;
1022     for (uint32_t channels = 9; channels < 16; channels++) {
1023       DWConvMicrokernelTester()
1024         .cr(8)
1025         .kr(25)
1026         .channels(channels)
1027         .qmin(128)
1028         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1029     }
1030   }
1031 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_gt_8_with_qmax)1032   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_gt_8_with_qmax) {
1033     TEST_REQUIRES_ARM_NEON;
1034     for (uint32_t channels = 9; channels < 16; channels++) {
1035       DWConvMicrokernelTester()
1036         .cr(8)
1037         .kr(25)
1038         .channels(channels)
1039         .qmax(128)
1040         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1041     }
1042   }
1043 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,multipixel)1044   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, multipixel) {
1045     TEST_REQUIRES_ARM_NEON;
1046     for (size_t channels = 1; channels <= 40; channels += 7) {
1047       DWConvMicrokernelTester()
1048         .cr(8)
1049         .kr(25)
1050         .channels(channels)
1051         .width(3)
1052         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1053     }
1054   }
1055 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,multipixel_with_step)1056   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, multipixel_with_step) {
1057     TEST_REQUIRES_ARM_NEON;
1058     for (size_t channels = 1; channels <= 40; channels += 7) {
1059       for (size_t step = 2; step <= 25; step++) {
1060         DWConvMicrokernelTester()
1061           .cr(8)
1062           .kr(25)
1063           .channels(channels)
1064           .width(3)
1065           .step(step)
1066           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1067       }
1068     }
1069   }
1070 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,multipixel_with_output_stride)1071   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, multipixel_with_output_stride) {
1072     TEST_REQUIRES_ARM_NEON;
1073     for (size_t channels = 1; channels <= 40; channels += 7) {
1074       DWConvMicrokernelTester()
1075         .cr(8)
1076         .kr(25)
1077         .channels(8)
1078         .width(5)
1079         .output_stride(43)
1080         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1081     }
1082   }
1083 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,multipixel_with_qmin)1084   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, multipixel_with_qmin) {
1085     TEST_REQUIRES_ARM_NEON;
1086     for (size_t channels = 1; channels <= 40; channels += 7) {
1087       DWConvMicrokernelTester()
1088         .cr(8)
1089         .kr(25)
1090         .channels(channels)
1091         .width(3)
1092         .qmin(128)
1093         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1094     }
1095   }
1096 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,multipixel_with_qmax)1097   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, multipixel_with_qmax) {
1098     TEST_REQUIRES_ARM_NEON;
1099     for (size_t channels = 1; channels <= 40; channels += 7) {
1100       DWConvMicrokernelTester()
1101         .cr(8)
1102         .kr(25)
1103         .channels(channels)
1104         .width(3)
1105         .qmax(128)
1106         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1107     }
1108   }
1109 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,input_offset)1110   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, input_offset) {
1111     TEST_REQUIRES_ARM_NEON;
1112     for (uint32_t channels = 16; channels < 128; channels += 24) {
1113       DWConvMicrokernelTester()
1114         .cr(8)
1115         .kr(25)
1116         .channels(channels)
1117         .input_offset(176)
1118         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1119     }
1120   }
1121 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,zero)1122   TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, zero) {
1123     TEST_REQUIRES_ARM_NEON;
1124     for (uint32_t mz = 0; mz < 25; mz++) {
1125       for (uint32_t channels = 16; channels < 128; channels += 24) {
1126         DWConvMicrokernelTester()
1127           .cr(8)
1128           .kr(25)
1129           .channels(channels)
1130           .input_offset(176)
1131           .zero_index(mz)
1132           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1133       }
1134     }
1135   }
1136 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1137 
1138 
1139 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,c_eq_16)1140   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, c_eq_16) {
1141     TEST_REQUIRES_ARM_NEON;
1142     DWConvMicrokernelTester()
1143       .cr(16)
1144       .kr(9)
1145       .channels(16)
1146       .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1147   }
1148 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,c_div_16)1149   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, c_div_16) {
1150     TEST_REQUIRES_ARM_NEON;
1151     for (uint32_t channels = 32; channels < 256; channels += 48) {
1152       DWConvMicrokernelTester()
1153         .cr(16)
1154         .kr(9)
1155         .channels(channels)
1156         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1157     }
1158   }
1159 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,c_div_16_with_qmin)1160   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, c_div_16_with_qmin) {
1161     TEST_REQUIRES_ARM_NEON;
1162     for (uint32_t channels = 32; channels < 256; channels += 48) {
1163       DWConvMicrokernelTester()
1164         .cr(16)
1165         .kr(9)
1166         .channels(channels)
1167         .qmin(128)
1168         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1169     }
1170   }
1171 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,c_div_16_with_qmax)1172   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, c_div_16_with_qmax) {
1173     TEST_REQUIRES_ARM_NEON;
1174     for (uint32_t channels = 32; channels < 256; channels += 48) {
1175       DWConvMicrokernelTester()
1176         .cr(16)
1177         .kr(9)
1178         .channels(channels)
1179         .qmax(128)
1180         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1181     }
1182   }
1183 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,c_lt_16)1184   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, c_lt_16) {
1185     TEST_REQUIRES_ARM_NEON;
1186     for (uint32_t channels = 1; channels < 16; channels++) {
1187       DWConvMicrokernelTester()
1188         .cr(16)
1189         .kr(9)
1190         .channels(channels)
1191         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1192     }
1193   }
1194 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,c_gt_16)1195   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, c_gt_16) {
1196     TEST_REQUIRES_ARM_NEON;
1197     for (uint32_t channels = 17; channels < 32; channels++) {
1198       DWConvMicrokernelTester()
1199         .cr(16)
1200         .kr(9)
1201         .channels(channels)
1202         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1203     }
1204   }
1205 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,c_gt_16_with_qmin)1206   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, c_gt_16_with_qmin) {
1207     TEST_REQUIRES_ARM_NEON;
1208     for (uint32_t channels = 17; channels < 32; channels++) {
1209       DWConvMicrokernelTester()
1210         .cr(16)
1211         .kr(9)
1212         .channels(channels)
1213         .qmin(128)
1214         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1215     }
1216   }
1217 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,c_gt_16_with_qmax)1218   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, c_gt_16_with_qmax) {
1219     TEST_REQUIRES_ARM_NEON;
1220     for (uint32_t channels = 17; channels < 32; channels++) {
1221       DWConvMicrokernelTester()
1222         .cr(16)
1223         .kr(9)
1224         .channels(channels)
1225         .qmax(128)
1226         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1227     }
1228   }
1229 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,multipixel)1230   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, multipixel) {
1231     TEST_REQUIRES_ARM_NEON;
1232     for (size_t channels = 1; channels <= 80; channels += 15) {
1233       DWConvMicrokernelTester()
1234         .cr(16)
1235         .kr(9)
1236         .channels(channels)
1237         .width(3)
1238         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1239     }
1240   }
1241 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,multipixel_with_step)1242   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, multipixel_with_step) {
1243     TEST_REQUIRES_ARM_NEON;
1244     for (size_t channels = 1; channels <= 80; channels += 15) {
1245       for (size_t step = 2; step <= 9; step++) {
1246         DWConvMicrokernelTester()
1247           .cr(16)
1248           .kr(9)
1249           .channels(channels)
1250           .width(3)
1251           .step(step)
1252           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1253       }
1254     }
1255   }
1256 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,multipixel_with_output_stride)1257   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, multipixel_with_output_stride) {
1258     TEST_REQUIRES_ARM_NEON;
1259     for (size_t channels = 1; channels <= 80; channels += 15) {
1260       DWConvMicrokernelTester()
1261         .cr(16)
1262         .kr(9)
1263         .channels(16)
1264         .width(5)
1265         .output_stride(83)
1266         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1267     }
1268   }
1269 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,multipixel_with_qmin)1270   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, multipixel_with_qmin) {
1271     TEST_REQUIRES_ARM_NEON;
1272     for (size_t channels = 1; channels <= 80; channels += 15) {
1273       DWConvMicrokernelTester()
1274         .cr(16)
1275         .kr(9)
1276         .channels(channels)
1277         .width(3)
1278         .qmin(128)
1279         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1280     }
1281   }
1282 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,multipixel_with_qmax)1283   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, multipixel_with_qmax) {
1284     TEST_REQUIRES_ARM_NEON;
1285     for (size_t channels = 1; channels <= 80; channels += 15) {
1286       DWConvMicrokernelTester()
1287         .cr(16)
1288         .kr(9)
1289         .channels(channels)
1290         .width(3)
1291         .qmax(128)
1292         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1293     }
1294   }
1295 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,input_offset)1296   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, input_offset) {
1297     TEST_REQUIRES_ARM_NEON;
1298     for (uint32_t channels = 32; channels < 256; channels += 48) {
1299       DWConvMicrokernelTester()
1300         .cr(16)
1301         .kr(9)
1302         .channels(channels)
1303         .input_offset(304)
1304         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1305     }
1306   }
1307 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,zero)1308   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, zero) {
1309     TEST_REQUIRES_ARM_NEON;
1310     for (uint32_t mz = 0; mz < 9; mz++) {
1311       for (uint32_t channels = 32; channels < 256; channels += 48) {
1312         DWConvMicrokernelTester()
1313           .cr(16)
1314           .kr(9)
1315           .channels(channels)
1316           .input_offset(304)
1317           .zero_index(mz)
1318           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1319       }
1320     }
1321   }
1322 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1323 
1324 
1325 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,c_eq_16)1326   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, c_eq_16) {
1327     TEST_REQUIRES_ARM_NEON;
1328     DWConvMicrokernelTester()
1329       .cr(16)
1330       .kr(9)
1331       .channels(16)
1332       .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1333   }
1334 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,c_div_16)1335   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, c_div_16) {
1336     TEST_REQUIRES_ARM_NEON;
1337     for (uint32_t channels = 32; channels < 256; channels += 48) {
1338       DWConvMicrokernelTester()
1339         .cr(16)
1340         .kr(9)
1341         .channels(channels)
1342         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1343     }
1344   }
1345 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,c_div_16_with_qmin)1346   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, c_div_16_with_qmin) {
1347     TEST_REQUIRES_ARM_NEON;
1348     for (uint32_t channels = 32; channels < 256; channels += 48) {
1349       DWConvMicrokernelTester()
1350         .cr(16)
1351         .kr(9)
1352         .channels(channels)
1353         .qmin(128)
1354         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1355     }
1356   }
1357 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,c_div_16_with_qmax)1358   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, c_div_16_with_qmax) {
1359     TEST_REQUIRES_ARM_NEON;
1360     for (uint32_t channels = 32; channels < 256; channels += 48) {
1361       DWConvMicrokernelTester()
1362         .cr(16)
1363         .kr(9)
1364         .channels(channels)
1365         .qmax(128)
1366         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1367     }
1368   }
1369 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,c_lt_16)1370   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, c_lt_16) {
1371     TEST_REQUIRES_ARM_NEON;
1372     for (uint32_t channels = 1; channels < 16; channels++) {
1373       DWConvMicrokernelTester()
1374         .cr(16)
1375         .kr(9)
1376         .channels(channels)
1377         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1378     }
1379   }
1380 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,c_gt_16)1381   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, c_gt_16) {
1382     TEST_REQUIRES_ARM_NEON;
1383     for (uint32_t channels = 17; channels < 32; channels++) {
1384       DWConvMicrokernelTester()
1385         .cr(16)
1386         .kr(9)
1387         .channels(channels)
1388         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1389     }
1390   }
1391 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,c_gt_16_with_qmin)1392   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, c_gt_16_with_qmin) {
1393     TEST_REQUIRES_ARM_NEON;
1394     for (uint32_t channels = 17; channels < 32; channels++) {
1395       DWConvMicrokernelTester()
1396         .cr(16)
1397         .kr(9)
1398         .channels(channels)
1399         .qmin(128)
1400         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1401     }
1402   }
1403 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,c_gt_16_with_qmax)1404   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, c_gt_16_with_qmax) {
1405     TEST_REQUIRES_ARM_NEON;
1406     for (uint32_t channels = 17; channels < 32; channels++) {
1407       DWConvMicrokernelTester()
1408         .cr(16)
1409         .kr(9)
1410         .channels(channels)
1411         .qmax(128)
1412         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1413     }
1414   }
1415 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,multipixel)1416   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, multipixel) {
1417     TEST_REQUIRES_ARM_NEON;
1418     for (size_t channels = 1; channels <= 80; channels += 15) {
1419       DWConvMicrokernelTester()
1420         .cr(16)
1421         .kr(9)
1422         .channels(channels)
1423         .width(3)
1424         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1425     }
1426   }
1427 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,multipixel_with_step)1428   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, multipixel_with_step) {
1429     TEST_REQUIRES_ARM_NEON;
1430     for (size_t channels = 1; channels <= 80; channels += 15) {
1431       for (size_t step = 2; step <= 9; step++) {
1432         DWConvMicrokernelTester()
1433           .cr(16)
1434           .kr(9)
1435           .channels(channels)
1436           .width(3)
1437           .step(step)
1438           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1439       }
1440     }
1441   }
1442 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,multipixel_with_output_stride)1443   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, multipixel_with_output_stride) {
1444     TEST_REQUIRES_ARM_NEON;
1445     for (size_t channels = 1; channels <= 80; channels += 15) {
1446       DWConvMicrokernelTester()
1447         .cr(16)
1448         .kr(9)
1449         .channels(16)
1450         .width(5)
1451         .output_stride(83)
1452         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1453     }
1454   }
1455 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,multipixel_with_qmin)1456   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, multipixel_with_qmin) {
1457     TEST_REQUIRES_ARM_NEON;
1458     for (size_t channels = 1; channels <= 80; channels += 15) {
1459       DWConvMicrokernelTester()
1460         .cr(16)
1461         .kr(9)
1462         .channels(channels)
1463         .width(3)
1464         .qmin(128)
1465         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1466     }
1467   }
1468 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,multipixel_with_qmax)1469   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, multipixel_with_qmax) {
1470     TEST_REQUIRES_ARM_NEON;
1471     for (size_t channels = 1; channels <= 80; channels += 15) {
1472       DWConvMicrokernelTester()
1473         .cr(16)
1474         .kr(9)
1475         .channels(channels)
1476         .width(3)
1477         .qmax(128)
1478         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1479     }
1480   }
1481 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,input_offset)1482   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, input_offset) {
1483     TEST_REQUIRES_ARM_NEON;
1484     for (uint32_t channels = 32; channels < 256; channels += 48) {
1485       DWConvMicrokernelTester()
1486         .cr(16)
1487         .kr(9)
1488         .channels(channels)
1489         .input_offset(304)
1490         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1491     }
1492   }
1493 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,zero)1494   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, zero) {
1495     TEST_REQUIRES_ARM_NEON;
1496     for (uint32_t mz = 0; mz < 9; mz++) {
1497       for (uint32_t channels = 32; channels < 256; channels += 48) {
1498         DWConvMicrokernelTester()
1499           .cr(16)
1500           .kr(9)
1501           .channels(channels)
1502           .input_offset(304)
1503           .zero_index(mz)
1504           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1505       }
1506     }
1507   }
1508 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1509 
1510 
1511 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,c_eq_16)1512   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, c_eq_16) {
1513     TEST_REQUIRES_ARM_NEON;
1514     DWConvMicrokernelTester()
1515       .cr(16)
1516       .kr(9)
1517       .channels(16)
1518       .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1519   }
1520 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,c_div_16)1521   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, c_div_16) {
1522     TEST_REQUIRES_ARM_NEON;
1523     for (uint32_t channels = 32; channels < 256; channels += 48) {
1524       DWConvMicrokernelTester()
1525         .cr(16)
1526         .kr(9)
1527         .channels(channels)
1528         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1529     }
1530   }
1531 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,c_div_16_with_qmin)1532   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, c_div_16_with_qmin) {
1533     TEST_REQUIRES_ARM_NEON;
1534     for (uint32_t channels = 32; channels < 256; channels += 48) {
1535       DWConvMicrokernelTester()
1536         .cr(16)
1537         .kr(9)
1538         .channels(channels)
1539         .qmin(128)
1540         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1541     }
1542   }
1543 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,c_div_16_with_qmax)1544   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, c_div_16_with_qmax) {
1545     TEST_REQUIRES_ARM_NEON;
1546     for (uint32_t channels = 32; channels < 256; channels += 48) {
1547       DWConvMicrokernelTester()
1548         .cr(16)
1549         .kr(9)
1550         .channels(channels)
1551         .qmax(128)
1552         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1553     }
1554   }
1555 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,c_lt_16)1556   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, c_lt_16) {
1557     TEST_REQUIRES_ARM_NEON;
1558     for (uint32_t channels = 1; channels < 16; channels++) {
1559       DWConvMicrokernelTester()
1560         .cr(16)
1561         .kr(9)
1562         .channels(channels)
1563         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1564     }
1565   }
1566 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,c_gt_16)1567   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, c_gt_16) {
1568     TEST_REQUIRES_ARM_NEON;
1569     for (uint32_t channels = 17; channels < 32; channels++) {
1570       DWConvMicrokernelTester()
1571         .cr(16)
1572         .kr(9)
1573         .channels(channels)
1574         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1575     }
1576   }
1577 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,c_gt_16_with_qmin)1578   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, c_gt_16_with_qmin) {
1579     TEST_REQUIRES_ARM_NEON;
1580     for (uint32_t channels = 17; channels < 32; channels++) {
1581       DWConvMicrokernelTester()
1582         .cr(16)
1583         .kr(9)
1584         .channels(channels)
1585         .qmin(128)
1586         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1587     }
1588   }
1589 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,c_gt_16_with_qmax)1590   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, c_gt_16_with_qmax) {
1591     TEST_REQUIRES_ARM_NEON;
1592     for (uint32_t channels = 17; channels < 32; channels++) {
1593       DWConvMicrokernelTester()
1594         .cr(16)
1595         .kr(9)
1596         .channels(channels)
1597         .qmax(128)
1598         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1599     }
1600   }
1601 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,multipixel)1602   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, multipixel) {
1603     TEST_REQUIRES_ARM_NEON;
1604     for (size_t channels = 1; channels <= 80; channels += 15) {
1605       DWConvMicrokernelTester()
1606         .cr(16)
1607         .kr(9)
1608         .channels(channels)
1609         .width(3)
1610         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1611     }
1612   }
1613 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,multipixel_with_step)1614   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, multipixel_with_step) {
1615     TEST_REQUIRES_ARM_NEON;
1616     for (size_t channels = 1; channels <= 80; channels += 15) {
1617       for (size_t step = 2; step <= 9; step++) {
1618         DWConvMicrokernelTester()
1619           .cr(16)
1620           .kr(9)
1621           .channels(channels)
1622           .width(3)
1623           .step(step)
1624           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1625       }
1626     }
1627   }
1628 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,multipixel_with_output_stride)1629   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, multipixel_with_output_stride) {
1630     TEST_REQUIRES_ARM_NEON;
1631     for (size_t channels = 1; channels <= 80; channels += 15) {
1632       DWConvMicrokernelTester()
1633         .cr(16)
1634         .kr(9)
1635         .channels(16)
1636         .width(5)
1637         .output_stride(83)
1638         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1639     }
1640   }
1641 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,multipixel_with_qmin)1642   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, multipixel_with_qmin) {
1643     TEST_REQUIRES_ARM_NEON;
1644     for (size_t channels = 1; channels <= 80; channels += 15) {
1645       DWConvMicrokernelTester()
1646         .cr(16)
1647         .kr(9)
1648         .channels(channels)
1649         .width(3)
1650         .qmin(128)
1651         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1652     }
1653   }
1654 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,multipixel_with_qmax)1655   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, multipixel_with_qmax) {
1656     TEST_REQUIRES_ARM_NEON;
1657     for (size_t channels = 1; channels <= 80; channels += 15) {
1658       DWConvMicrokernelTester()
1659         .cr(16)
1660         .kr(9)
1661         .channels(channels)
1662         .width(3)
1663         .qmax(128)
1664         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1665     }
1666   }
1667 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,input_offset)1668   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, input_offset) {
1669     TEST_REQUIRES_ARM_NEON;
1670     for (uint32_t channels = 32; channels < 256; channels += 48) {
1671       DWConvMicrokernelTester()
1672         .cr(16)
1673         .kr(9)
1674         .channels(channels)
1675         .input_offset(304)
1676         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1677     }
1678   }
1679 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,zero)1680   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, zero) {
1681     TEST_REQUIRES_ARM_NEON;
1682     for (uint32_t mz = 0; mz < 9; mz++) {
1683       for (uint32_t channels = 32; channels < 256; channels += 48) {
1684         DWConvMicrokernelTester()
1685           .cr(16)
1686           .kr(9)
1687           .channels(channels)
1688           .input_offset(304)
1689           .zero_index(mz)
1690           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1691       }
1692     }
1693   }
1694 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1695 
1696 
1697 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,c_eq_16)1698   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, c_eq_16) {
1699     TEST_REQUIRES_ARM_NEON;
1700     DWConvMicrokernelTester()
1701       .cr(16)
1702       .kr(9)
1703       .channels(16)
1704       .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1705   }
1706 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,c_div_16)1707   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, c_div_16) {
1708     TEST_REQUIRES_ARM_NEON;
1709     for (uint32_t channels = 32; channels < 256; channels += 48) {
1710       DWConvMicrokernelTester()
1711         .cr(16)
1712         .kr(9)
1713         .channels(channels)
1714         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1715     }
1716   }
1717 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,c_div_16_with_qmin)1718   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, c_div_16_with_qmin) {
1719     TEST_REQUIRES_ARM_NEON;
1720     for (uint32_t channels = 32; channels < 256; channels += 48) {
1721       DWConvMicrokernelTester()
1722         .cr(16)
1723         .kr(9)
1724         .channels(channels)
1725         .qmin(128)
1726         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1727     }
1728   }
1729 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,c_div_16_with_qmax)1730   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, c_div_16_with_qmax) {
1731     TEST_REQUIRES_ARM_NEON;
1732     for (uint32_t channels = 32; channels < 256; channels += 48) {
1733       DWConvMicrokernelTester()
1734         .cr(16)
1735         .kr(9)
1736         .channels(channels)
1737         .qmax(128)
1738         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1739     }
1740   }
1741 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,c_lt_16)1742   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, c_lt_16) {
1743     TEST_REQUIRES_ARM_NEON;
1744     for (uint32_t channels = 1; channels < 16; channels++) {
1745       DWConvMicrokernelTester()
1746         .cr(16)
1747         .kr(9)
1748         .channels(channels)
1749         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1750     }
1751   }
1752 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,c_gt_16)1753   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, c_gt_16) {
1754     TEST_REQUIRES_ARM_NEON;
1755     for (uint32_t channels = 17; channels < 32; channels++) {
1756       DWConvMicrokernelTester()
1757         .cr(16)
1758         .kr(9)
1759         .channels(channels)
1760         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1761     }
1762   }
1763 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,c_gt_16_with_qmin)1764   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, c_gt_16_with_qmin) {
1765     TEST_REQUIRES_ARM_NEON;
1766     for (uint32_t channels = 17; channels < 32; channels++) {
1767       DWConvMicrokernelTester()
1768         .cr(16)
1769         .kr(9)
1770         .channels(channels)
1771         .qmin(128)
1772         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1773     }
1774   }
1775 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,c_gt_16_with_qmax)1776   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, c_gt_16_with_qmax) {
1777     TEST_REQUIRES_ARM_NEON;
1778     for (uint32_t channels = 17; channels < 32; channels++) {
1779       DWConvMicrokernelTester()
1780         .cr(16)
1781         .kr(9)
1782         .channels(channels)
1783         .qmax(128)
1784         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1785     }
1786   }
1787 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,multipixel)1788   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, multipixel) {
1789     TEST_REQUIRES_ARM_NEON;
1790     for (size_t channels = 1; channels <= 80; channels += 15) {
1791       DWConvMicrokernelTester()
1792         .cr(16)
1793         .kr(9)
1794         .channels(channels)
1795         .width(3)
1796         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1797     }
1798   }
1799 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,multipixel_with_step)1800   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, multipixel_with_step) {
1801     TEST_REQUIRES_ARM_NEON;
1802     for (size_t channels = 1; channels <= 80; channels += 15) {
1803       for (size_t step = 2; step <= 9; step++) {
1804         DWConvMicrokernelTester()
1805           .cr(16)
1806           .kr(9)
1807           .channels(channels)
1808           .width(3)
1809           .step(step)
1810           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1811       }
1812     }
1813   }
1814 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,multipixel_with_output_stride)1815   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, multipixel_with_output_stride) {
1816     TEST_REQUIRES_ARM_NEON;
1817     for (size_t channels = 1; channels <= 80; channels += 15) {
1818       DWConvMicrokernelTester()
1819         .cr(16)
1820         .kr(9)
1821         .channels(16)
1822         .width(5)
1823         .output_stride(83)
1824         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1825     }
1826   }
1827 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,multipixel_with_qmin)1828   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, multipixel_with_qmin) {
1829     TEST_REQUIRES_ARM_NEON;
1830     for (size_t channels = 1; channels <= 80; channels += 15) {
1831       DWConvMicrokernelTester()
1832         .cr(16)
1833         .kr(9)
1834         .channels(channels)
1835         .width(3)
1836         .qmin(128)
1837         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1838     }
1839   }
1840 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,multipixel_with_qmax)1841   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, multipixel_with_qmax) {
1842     TEST_REQUIRES_ARM_NEON;
1843     for (size_t channels = 1; channels <= 80; channels += 15) {
1844       DWConvMicrokernelTester()
1845         .cr(16)
1846         .kr(9)
1847         .channels(channels)
1848         .width(3)
1849         .qmax(128)
1850         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1851     }
1852   }
1853 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,input_offset)1854   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, input_offset) {
1855     TEST_REQUIRES_ARM_NEON;
1856     for (uint32_t channels = 32; channels < 256; channels += 48) {
1857       DWConvMicrokernelTester()
1858         .cr(16)
1859         .kr(9)
1860         .channels(channels)
1861         .input_offset(304)
1862         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1863     }
1864   }
1865 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,zero)1866   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, zero) {
1867     TEST_REQUIRES_ARM_NEON;
1868     for (uint32_t mz = 0; mz < 9; mz++) {
1869       for (uint32_t channels = 32; channels < 256; channels += 48) {
1870         DWConvMicrokernelTester()
1871           .cr(16)
1872           .kr(9)
1873           .channels(channels)
1874           .input_offset(304)
1875           .zero_index(mz)
1876           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1877       }
1878     }
1879   }
1880 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1881 
1882 
1883 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_eq_16)1884   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_eq_16) {
1885     TEST_REQUIRES_ARM_NEON;
1886     DWConvMicrokernelTester()
1887       .cr(16)
1888       .kr(9)
1889       .channels(16)
1890       .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1891   }
1892 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_div_16)1893   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_div_16) {
1894     TEST_REQUIRES_ARM_NEON;
1895     for (uint32_t channels = 32; channels < 256; channels += 48) {
1896       DWConvMicrokernelTester()
1897         .cr(16)
1898         .kr(9)
1899         .channels(channels)
1900         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1901     }
1902   }
1903 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_div_16_with_qmin)1904   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_div_16_with_qmin) {
1905     TEST_REQUIRES_ARM_NEON;
1906     for (uint32_t channels = 32; channels < 256; channels += 48) {
1907       DWConvMicrokernelTester()
1908         .cr(16)
1909         .kr(9)
1910         .channels(channels)
1911         .qmin(128)
1912         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1913     }
1914   }
1915 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_div_16_with_qmax)1916   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_div_16_with_qmax) {
1917     TEST_REQUIRES_ARM_NEON;
1918     for (uint32_t channels = 32; channels < 256; channels += 48) {
1919       DWConvMicrokernelTester()
1920         .cr(16)
1921         .kr(9)
1922         .channels(channels)
1923         .qmax(128)
1924         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1925     }
1926   }
1927 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_lt_16)1928   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_lt_16) {
1929     TEST_REQUIRES_ARM_NEON;
1930     for (uint32_t channels = 1; channels < 16; channels++) {
1931       DWConvMicrokernelTester()
1932         .cr(16)
1933         .kr(9)
1934         .channels(channels)
1935         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1936     }
1937   }
1938 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_gt_16)1939   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_gt_16) {
1940     TEST_REQUIRES_ARM_NEON;
1941     for (uint32_t channels = 17; channels < 32; channels++) {
1942       DWConvMicrokernelTester()
1943         .cr(16)
1944         .kr(9)
1945         .channels(channels)
1946         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1947     }
1948   }
1949 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_gt_16_with_qmin)1950   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_gt_16_with_qmin) {
1951     TEST_REQUIRES_ARM_NEON;
1952     for (uint32_t channels = 17; channels < 32; channels++) {
1953       DWConvMicrokernelTester()
1954         .cr(16)
1955         .kr(9)
1956         .channels(channels)
1957         .qmin(128)
1958         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1959     }
1960   }
1961 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_gt_16_with_qmax)1962   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_gt_16_with_qmax) {
1963     TEST_REQUIRES_ARM_NEON;
1964     for (uint32_t channels = 17; channels < 32; channels++) {
1965       DWConvMicrokernelTester()
1966         .cr(16)
1967         .kr(9)
1968         .channels(channels)
1969         .qmax(128)
1970         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1971     }
1972   }
1973 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,multipixel)1974   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, multipixel) {
1975     TEST_REQUIRES_ARM_NEON;
1976     for (size_t channels = 1; channels <= 80; channels += 15) {
1977       DWConvMicrokernelTester()
1978         .cr(16)
1979         .kr(9)
1980         .channels(channels)
1981         .width(3)
1982         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1983     }
1984   }
1985 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,multipixel_with_step)1986   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, multipixel_with_step) {
1987     TEST_REQUIRES_ARM_NEON;
1988     for (size_t channels = 1; channels <= 80; channels += 15) {
1989       for (size_t step = 2; step <= 9; step++) {
1990         DWConvMicrokernelTester()
1991           .cr(16)
1992           .kr(9)
1993           .channels(channels)
1994           .width(3)
1995           .step(step)
1996           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1997       }
1998     }
1999   }
2000 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,multipixel_with_output_stride)2001   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, multipixel_with_output_stride) {
2002     TEST_REQUIRES_ARM_NEON;
2003     for (size_t channels = 1; channels <= 80; channels += 15) {
2004       DWConvMicrokernelTester()
2005         .cr(16)
2006         .kr(9)
2007         .channels(16)
2008         .width(5)
2009         .output_stride(83)
2010         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2011     }
2012   }
2013 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,multipixel_with_qmin)2014   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, multipixel_with_qmin) {
2015     TEST_REQUIRES_ARM_NEON;
2016     for (size_t channels = 1; channels <= 80; channels += 15) {
2017       DWConvMicrokernelTester()
2018         .cr(16)
2019         .kr(9)
2020         .channels(channels)
2021         .width(3)
2022         .qmin(128)
2023         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2024     }
2025   }
2026 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,multipixel_with_qmax)2027   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, multipixel_with_qmax) {
2028     TEST_REQUIRES_ARM_NEON;
2029     for (size_t channels = 1; channels <= 80; channels += 15) {
2030       DWConvMicrokernelTester()
2031         .cr(16)
2032         .kr(9)
2033         .channels(channels)
2034         .width(3)
2035         .qmax(128)
2036         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2037     }
2038   }
2039 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,input_offset)2040   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, input_offset) {
2041     TEST_REQUIRES_ARM_NEON;
2042     for (uint32_t channels = 32; channels < 256; channels += 48) {
2043       DWConvMicrokernelTester()
2044         .cr(16)
2045         .kr(9)
2046         .channels(channels)
2047         .input_offset(304)
2048         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2049     }
2050   }
2051 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,zero)2052   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, zero) {
2053     TEST_REQUIRES_ARM_NEON;
2054     for (uint32_t mz = 0; mz < 9; mz++) {
2055       for (uint32_t channels = 32; channels < 256; channels += 48) {
2056         DWConvMicrokernelTester()
2057           .cr(16)
2058           .kr(9)
2059           .channels(channels)
2060           .input_offset(304)
2061           .zero_index(mz)
2062           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2063       }
2064     }
2065   }
2066 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2067 
2068 
2069 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,c_eq_16)2070   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, c_eq_16) {
2071     TEST_REQUIRES_ARM_NEON;
2072     DWConvMicrokernelTester()
2073       .cr(16)
2074       .kr(25)
2075       .channels(16)
2076       .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2077   }
2078 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,c_div_16)2079   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, c_div_16) {
2080     TEST_REQUIRES_ARM_NEON;
2081     for (uint32_t channels = 32; channels < 256; channels += 48) {
2082       DWConvMicrokernelTester()
2083         .cr(16)
2084         .kr(25)
2085         .channels(channels)
2086         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2087     }
2088   }
2089 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,c_div_16_with_qmin)2090   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, c_div_16_with_qmin) {
2091     TEST_REQUIRES_ARM_NEON;
2092     for (uint32_t channels = 32; channels < 256; channels += 48) {
2093       DWConvMicrokernelTester()
2094         .cr(16)
2095         .kr(25)
2096         .channels(channels)
2097         .qmin(128)
2098         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2099     }
2100   }
2101 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,c_div_16_with_qmax)2102   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, c_div_16_with_qmax) {
2103     TEST_REQUIRES_ARM_NEON;
2104     for (uint32_t channels = 32; channels < 256; channels += 48) {
2105       DWConvMicrokernelTester()
2106         .cr(16)
2107         .kr(25)
2108         .channels(channels)
2109         .qmax(128)
2110         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2111     }
2112   }
2113 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,c_lt_16)2114   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, c_lt_16) {
2115     TEST_REQUIRES_ARM_NEON;
2116     for (uint32_t channels = 1; channels < 16; channels++) {
2117       DWConvMicrokernelTester()
2118         .cr(16)
2119         .kr(25)
2120         .channels(channels)
2121         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2122     }
2123   }
2124 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,c_gt_16)2125   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, c_gt_16) {
2126     TEST_REQUIRES_ARM_NEON;
2127     for (uint32_t channels = 17; channels < 32; channels++) {
2128       DWConvMicrokernelTester()
2129         .cr(16)
2130         .kr(25)
2131         .channels(channels)
2132         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2133     }
2134   }
2135 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,c_gt_16_with_qmin)2136   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, c_gt_16_with_qmin) {
2137     TEST_REQUIRES_ARM_NEON;
2138     for (uint32_t channels = 17; channels < 32; channels++) {
2139       DWConvMicrokernelTester()
2140         .cr(16)
2141         .kr(25)
2142         .channels(channels)
2143         .qmin(128)
2144         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2145     }
2146   }
2147 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,c_gt_16_with_qmax)2148   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, c_gt_16_with_qmax) {
2149     TEST_REQUIRES_ARM_NEON;
2150     for (uint32_t channels = 17; channels < 32; channels++) {
2151       DWConvMicrokernelTester()
2152         .cr(16)
2153         .kr(25)
2154         .channels(channels)
2155         .qmax(128)
2156         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2157     }
2158   }
2159 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,multipixel)2160   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, multipixel) {
2161     TEST_REQUIRES_ARM_NEON;
2162     for (size_t channels = 1; channels <= 80; channels += 15) {
2163       DWConvMicrokernelTester()
2164         .cr(16)
2165         .kr(25)
2166         .channels(channels)
2167         .width(3)
2168         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2169     }
2170   }
2171 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,multipixel_with_step)2172   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, multipixel_with_step) {
2173     TEST_REQUIRES_ARM_NEON;
2174     for (size_t channels = 1; channels <= 80; channels += 15) {
2175       for (size_t step = 2; step <= 25; step++) {
2176         DWConvMicrokernelTester()
2177           .cr(16)
2178           .kr(25)
2179           .channels(channels)
2180           .width(3)
2181           .step(step)
2182           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2183       }
2184     }
2185   }
2186 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,multipixel_with_output_stride)2187   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, multipixel_with_output_stride) {
2188     TEST_REQUIRES_ARM_NEON;
2189     for (size_t channels = 1; channels <= 80; channels += 15) {
2190       DWConvMicrokernelTester()
2191         .cr(16)
2192         .kr(25)
2193         .channels(16)
2194         .width(5)
2195         .output_stride(83)
2196         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2197     }
2198   }
2199 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,multipixel_with_qmin)2200   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, multipixel_with_qmin) {
2201     TEST_REQUIRES_ARM_NEON;
2202     for (size_t channels = 1; channels <= 80; channels += 15) {
2203       DWConvMicrokernelTester()
2204         .cr(16)
2205         .kr(25)
2206         .channels(channels)
2207         .width(3)
2208         .qmin(128)
2209         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2210     }
2211   }
2212 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,multipixel_with_qmax)2213   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, multipixel_with_qmax) {
2214     TEST_REQUIRES_ARM_NEON;
2215     for (size_t channels = 1; channels <= 80; channels += 15) {
2216       DWConvMicrokernelTester()
2217         .cr(16)
2218         .kr(25)
2219         .channels(channels)
2220         .width(3)
2221         .qmax(128)
2222         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2223     }
2224   }
2225 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,input_offset)2226   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, input_offset) {
2227     TEST_REQUIRES_ARM_NEON;
2228     for (uint32_t channels = 32; channels < 256; channels += 48) {
2229       DWConvMicrokernelTester()
2230         .cr(16)
2231         .kr(25)
2232         .channels(channels)
2233         .input_offset(304)
2234         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2235     }
2236   }
2237 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,zero)2238   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, zero) {
2239     TEST_REQUIRES_ARM_NEON;
2240     for (uint32_t mz = 0; mz < 25; mz++) {
2241       for (uint32_t channels = 32; channels < 256; channels += 48) {
2242         DWConvMicrokernelTester()
2243           .cr(16)
2244           .kr(25)
2245           .channels(channels)
2246           .input_offset(304)
2247           .zero_index(mz)
2248           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2249       }
2250     }
2251   }
2252 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2253 
2254 
2255 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,c_eq_16)2256   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, c_eq_16) {
2257     TEST_REQUIRES_ARM_NEON;
2258     DWConvMicrokernelTester()
2259       .cr(16)
2260       .kr(25)
2261       .channels(16)
2262       .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2263   }
2264 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,c_div_16)2265   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, c_div_16) {
2266     TEST_REQUIRES_ARM_NEON;
2267     for (uint32_t channels = 32; channels < 256; channels += 48) {
2268       DWConvMicrokernelTester()
2269         .cr(16)
2270         .kr(25)
2271         .channels(channels)
2272         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2273     }
2274   }
2275 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,c_div_16_with_qmin)2276   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, c_div_16_with_qmin) {
2277     TEST_REQUIRES_ARM_NEON;
2278     for (uint32_t channels = 32; channels < 256; channels += 48) {
2279       DWConvMicrokernelTester()
2280         .cr(16)
2281         .kr(25)
2282         .channels(channels)
2283         .qmin(128)
2284         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2285     }
2286   }
2287 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,c_div_16_with_qmax)2288   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, c_div_16_with_qmax) {
2289     TEST_REQUIRES_ARM_NEON;
2290     for (uint32_t channels = 32; channels < 256; channels += 48) {
2291       DWConvMicrokernelTester()
2292         .cr(16)
2293         .kr(25)
2294         .channels(channels)
2295         .qmax(128)
2296         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2297     }
2298   }
2299 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,c_lt_16)2300   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, c_lt_16) {
2301     TEST_REQUIRES_ARM_NEON;
2302     for (uint32_t channels = 1; channels < 16; channels++) {
2303       DWConvMicrokernelTester()
2304         .cr(16)
2305         .kr(25)
2306         .channels(channels)
2307         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2308     }
2309   }
2310 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,c_gt_16)2311   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, c_gt_16) {
2312     TEST_REQUIRES_ARM_NEON;
2313     for (uint32_t channels = 17; channels < 32; channels++) {
2314       DWConvMicrokernelTester()
2315         .cr(16)
2316         .kr(25)
2317         .channels(channels)
2318         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2319     }
2320   }
2321 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,c_gt_16_with_qmin)2322   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, c_gt_16_with_qmin) {
2323     TEST_REQUIRES_ARM_NEON;
2324     for (uint32_t channels = 17; channels < 32; channels++) {
2325       DWConvMicrokernelTester()
2326         .cr(16)
2327         .kr(25)
2328         .channels(channels)
2329         .qmin(128)
2330         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2331     }
2332   }
2333 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,c_gt_16_with_qmax)2334   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, c_gt_16_with_qmax) {
2335     TEST_REQUIRES_ARM_NEON;
2336     for (uint32_t channels = 17; channels < 32; channels++) {
2337       DWConvMicrokernelTester()
2338         .cr(16)
2339         .kr(25)
2340         .channels(channels)
2341         .qmax(128)
2342         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2343     }
2344   }
2345 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,multipixel)2346   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, multipixel) {
2347     TEST_REQUIRES_ARM_NEON;
2348     for (size_t channels = 1; channels <= 80; channels += 15) {
2349       DWConvMicrokernelTester()
2350         .cr(16)
2351         .kr(25)
2352         .channels(channels)
2353         .width(3)
2354         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2355     }
2356   }
2357 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,multipixel_with_step)2358   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, multipixel_with_step) {
2359     TEST_REQUIRES_ARM_NEON;
2360     for (size_t channels = 1; channels <= 80; channels += 15) {
2361       for (size_t step = 2; step <= 25; step++) {
2362         DWConvMicrokernelTester()
2363           .cr(16)
2364           .kr(25)
2365           .channels(channels)
2366           .width(3)
2367           .step(step)
2368           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2369       }
2370     }
2371   }
2372 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,multipixel_with_output_stride)2373   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, multipixel_with_output_stride) {
2374     TEST_REQUIRES_ARM_NEON;
2375     for (size_t channels = 1; channels <= 80; channels += 15) {
2376       DWConvMicrokernelTester()
2377         .cr(16)
2378         .kr(25)
2379         .channels(16)
2380         .width(5)
2381         .output_stride(83)
2382         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2383     }
2384   }
2385 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,multipixel_with_qmin)2386   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, multipixel_with_qmin) {
2387     TEST_REQUIRES_ARM_NEON;
2388     for (size_t channels = 1; channels <= 80; channels += 15) {
2389       DWConvMicrokernelTester()
2390         .cr(16)
2391         .kr(25)
2392         .channels(channels)
2393         .width(3)
2394         .qmin(128)
2395         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2396     }
2397   }
2398 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,multipixel_with_qmax)2399   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, multipixel_with_qmax) {
2400     TEST_REQUIRES_ARM_NEON;
2401     for (size_t channels = 1; channels <= 80; channels += 15) {
2402       DWConvMicrokernelTester()
2403         .cr(16)
2404         .kr(25)
2405         .channels(channels)
2406         .width(3)
2407         .qmax(128)
2408         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2409     }
2410   }
2411 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,input_offset)2412   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, input_offset) {
2413     TEST_REQUIRES_ARM_NEON;
2414     for (uint32_t channels = 32; channels < 256; channels += 48) {
2415       DWConvMicrokernelTester()
2416         .cr(16)
2417         .kr(25)
2418         .channels(channels)
2419         .input_offset(304)
2420         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2421     }
2422   }
2423 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,zero)2424   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, zero) {
2425     TEST_REQUIRES_ARM_NEON;
2426     for (uint32_t mz = 0; mz < 25; mz++) {
2427       for (uint32_t channels = 32; channels < 256; channels += 48) {
2428         DWConvMicrokernelTester()
2429           .cr(16)
2430           .kr(25)
2431           .channels(channels)
2432           .input_offset(304)
2433           .zero_index(mz)
2434           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2435       }
2436     }
2437   }
2438 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2439 
2440 
2441 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,c_eq_16)2442   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, c_eq_16) {
2443     TEST_REQUIRES_ARM_NEON;
2444     DWConvMicrokernelTester()
2445       .cr(16)
2446       .kr(25)
2447       .channels(16)
2448       .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2449   }
2450 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,c_div_16)2451   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, c_div_16) {
2452     TEST_REQUIRES_ARM_NEON;
2453     for (uint32_t channels = 32; channels < 256; channels += 48) {
2454       DWConvMicrokernelTester()
2455         .cr(16)
2456         .kr(25)
2457         .channels(channels)
2458         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2459     }
2460   }
2461 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,c_div_16_with_qmin)2462   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, c_div_16_with_qmin) {
2463     TEST_REQUIRES_ARM_NEON;
2464     for (uint32_t channels = 32; channels < 256; channels += 48) {
2465       DWConvMicrokernelTester()
2466         .cr(16)
2467         .kr(25)
2468         .channels(channels)
2469         .qmin(128)
2470         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2471     }
2472   }
2473 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,c_div_16_with_qmax)2474   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, c_div_16_with_qmax) {
2475     TEST_REQUIRES_ARM_NEON;
2476     for (uint32_t channels = 32; channels < 256; channels += 48) {
2477       DWConvMicrokernelTester()
2478         .cr(16)
2479         .kr(25)
2480         .channels(channels)
2481         .qmax(128)
2482         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2483     }
2484   }
2485 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,c_lt_16)2486   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, c_lt_16) {
2487     TEST_REQUIRES_ARM_NEON;
2488     for (uint32_t channels = 1; channels < 16; channels++) {
2489       DWConvMicrokernelTester()
2490         .cr(16)
2491         .kr(25)
2492         .channels(channels)
2493         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2494     }
2495   }
2496 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,c_gt_16)2497   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, c_gt_16) {
2498     TEST_REQUIRES_ARM_NEON;
2499     for (uint32_t channels = 17; channels < 32; channels++) {
2500       DWConvMicrokernelTester()
2501         .cr(16)
2502         .kr(25)
2503         .channels(channels)
2504         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2505     }
2506   }
2507 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,c_gt_16_with_qmin)2508   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, c_gt_16_with_qmin) {
2509     TEST_REQUIRES_ARM_NEON;
2510     for (uint32_t channels = 17; channels < 32; channels++) {
2511       DWConvMicrokernelTester()
2512         .cr(16)
2513         .kr(25)
2514         .channels(channels)
2515         .qmin(128)
2516         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2517     }
2518   }
2519 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,c_gt_16_with_qmax)2520   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, c_gt_16_with_qmax) {
2521     TEST_REQUIRES_ARM_NEON;
2522     for (uint32_t channels = 17; channels < 32; channels++) {
2523       DWConvMicrokernelTester()
2524         .cr(16)
2525         .kr(25)
2526         .channels(channels)
2527         .qmax(128)
2528         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2529     }
2530   }
2531 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,multipixel)2532   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, multipixel) {
2533     TEST_REQUIRES_ARM_NEON;
2534     for (size_t channels = 1; channels <= 80; channels += 15) {
2535       DWConvMicrokernelTester()
2536         .cr(16)
2537         .kr(25)
2538         .channels(channels)
2539         .width(3)
2540         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2541     }
2542   }
2543 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,multipixel_with_step)2544   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, multipixel_with_step) {
2545     TEST_REQUIRES_ARM_NEON;
2546     for (size_t channels = 1; channels <= 80; channels += 15) {
2547       for (size_t step = 2; step <= 25; step++) {
2548         DWConvMicrokernelTester()
2549           .cr(16)
2550           .kr(25)
2551           .channels(channels)
2552           .width(3)
2553           .step(step)
2554           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2555       }
2556     }
2557   }
2558 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,multipixel_with_output_stride)2559   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, multipixel_with_output_stride) {
2560     TEST_REQUIRES_ARM_NEON;
2561     for (size_t channels = 1; channels <= 80; channels += 15) {
2562       DWConvMicrokernelTester()
2563         .cr(16)
2564         .kr(25)
2565         .channels(16)
2566         .width(5)
2567         .output_stride(83)
2568         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2569     }
2570   }
2571 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,multipixel_with_qmin)2572   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, multipixel_with_qmin) {
2573     TEST_REQUIRES_ARM_NEON;
2574     for (size_t channels = 1; channels <= 80; channels += 15) {
2575       DWConvMicrokernelTester()
2576         .cr(16)
2577         .kr(25)
2578         .channels(channels)
2579         .width(3)
2580         .qmin(128)
2581         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2582     }
2583   }
2584 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,multipixel_with_qmax)2585   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, multipixel_with_qmax) {
2586     TEST_REQUIRES_ARM_NEON;
2587     for (size_t channels = 1; channels <= 80; channels += 15) {
2588       DWConvMicrokernelTester()
2589         .cr(16)
2590         .kr(25)
2591         .channels(channels)
2592         .width(3)
2593         .qmax(128)
2594         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2595     }
2596   }
2597 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,input_offset)2598   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, input_offset) {
2599     TEST_REQUIRES_ARM_NEON;
2600     for (uint32_t channels = 32; channels < 256; channels += 48) {
2601       DWConvMicrokernelTester()
2602         .cr(16)
2603         .kr(25)
2604         .channels(channels)
2605         .input_offset(304)
2606         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2607     }
2608   }
2609 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,zero)2610   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, zero) {
2611     TEST_REQUIRES_ARM_NEON;
2612     for (uint32_t mz = 0; mz < 25; mz++) {
2613       for (uint32_t channels = 32; channels < 256; channels += 48) {
2614         DWConvMicrokernelTester()
2615           .cr(16)
2616           .kr(25)
2617           .channels(channels)
2618           .input_offset(304)
2619           .zero_index(mz)
2620           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2621       }
2622     }
2623   }
2624 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2625 
2626 
2627 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,c_eq_16)2628   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, c_eq_16) {
2629     TEST_REQUIRES_ARM_NEON;
2630     DWConvMicrokernelTester()
2631       .cr(16)
2632       .kr(25)
2633       .channels(16)
2634       .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2635   }
2636 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,c_div_16)2637   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, c_div_16) {
2638     TEST_REQUIRES_ARM_NEON;
2639     for (uint32_t channels = 32; channels < 256; channels += 48) {
2640       DWConvMicrokernelTester()
2641         .cr(16)
2642         .kr(25)
2643         .channels(channels)
2644         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2645     }
2646   }
2647 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,c_div_16_with_qmin)2648   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, c_div_16_with_qmin) {
2649     TEST_REQUIRES_ARM_NEON;
2650     for (uint32_t channels = 32; channels < 256; channels += 48) {
2651       DWConvMicrokernelTester()
2652         .cr(16)
2653         .kr(25)
2654         .channels(channels)
2655         .qmin(128)
2656         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2657     }
2658   }
2659 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,c_div_16_with_qmax)2660   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, c_div_16_with_qmax) {
2661     TEST_REQUIRES_ARM_NEON;
2662     for (uint32_t channels = 32; channels < 256; channels += 48) {
2663       DWConvMicrokernelTester()
2664         .cr(16)
2665         .kr(25)
2666         .channels(channels)
2667         .qmax(128)
2668         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2669     }
2670   }
2671 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,c_lt_16)2672   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, c_lt_16) {
2673     TEST_REQUIRES_ARM_NEON;
2674     for (uint32_t channels = 1; channels < 16; channels++) {
2675       DWConvMicrokernelTester()
2676         .cr(16)
2677         .kr(25)
2678         .channels(channels)
2679         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2680     }
2681   }
2682 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,c_gt_16)2683   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, c_gt_16) {
2684     TEST_REQUIRES_ARM_NEON;
2685     for (uint32_t channels = 17; channels < 32; channels++) {
2686       DWConvMicrokernelTester()
2687         .cr(16)
2688         .kr(25)
2689         .channels(channels)
2690         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2691     }
2692   }
2693 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,c_gt_16_with_qmin)2694   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, c_gt_16_with_qmin) {
2695     TEST_REQUIRES_ARM_NEON;
2696     for (uint32_t channels = 17; channels < 32; channels++) {
2697       DWConvMicrokernelTester()
2698         .cr(16)
2699         .kr(25)
2700         .channels(channels)
2701         .qmin(128)
2702         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2703     }
2704   }
2705 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,c_gt_16_with_qmax)2706   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, c_gt_16_with_qmax) {
2707     TEST_REQUIRES_ARM_NEON;
2708     for (uint32_t channels = 17; channels < 32; channels++) {
2709       DWConvMicrokernelTester()
2710         .cr(16)
2711         .kr(25)
2712         .channels(channels)
2713         .qmax(128)
2714         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2715     }
2716   }
2717 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,multipixel)2718   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, multipixel) {
2719     TEST_REQUIRES_ARM_NEON;
2720     for (size_t channels = 1; channels <= 80; channels += 15) {
2721       DWConvMicrokernelTester()
2722         .cr(16)
2723         .kr(25)
2724         .channels(channels)
2725         .width(3)
2726         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2727     }
2728   }
2729 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,multipixel_with_step)2730   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, multipixel_with_step) {
2731     TEST_REQUIRES_ARM_NEON;
2732     for (size_t channels = 1; channels <= 80; channels += 15) {
2733       for (size_t step = 2; step <= 25; step++) {
2734         DWConvMicrokernelTester()
2735           .cr(16)
2736           .kr(25)
2737           .channels(channels)
2738           .width(3)
2739           .step(step)
2740           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2741       }
2742     }
2743   }
2744 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,multipixel_with_output_stride)2745   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, multipixel_with_output_stride) {
2746     TEST_REQUIRES_ARM_NEON;
2747     for (size_t channels = 1; channels <= 80; channels += 15) {
2748       DWConvMicrokernelTester()
2749         .cr(16)
2750         .kr(25)
2751         .channels(16)
2752         .width(5)
2753         .output_stride(83)
2754         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2755     }
2756   }
2757 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,multipixel_with_qmin)2758   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, multipixel_with_qmin) {
2759     TEST_REQUIRES_ARM_NEON;
2760     for (size_t channels = 1; channels <= 80; channels += 15) {
2761       DWConvMicrokernelTester()
2762         .cr(16)
2763         .kr(25)
2764         .channels(channels)
2765         .width(3)
2766         .qmin(128)
2767         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2768     }
2769   }
2770 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,multipixel_with_qmax)2771   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, multipixel_with_qmax) {
2772     TEST_REQUIRES_ARM_NEON;
2773     for (size_t channels = 1; channels <= 80; channels += 15) {
2774       DWConvMicrokernelTester()
2775         .cr(16)
2776         .kr(25)
2777         .channels(channels)
2778         .width(3)
2779         .qmax(128)
2780         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2781     }
2782   }
2783 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,input_offset)2784   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, input_offset) {
2785     TEST_REQUIRES_ARM_NEON;
2786     for (uint32_t channels = 32; channels < 256; channels += 48) {
2787       DWConvMicrokernelTester()
2788         .cr(16)
2789         .kr(25)
2790         .channels(channels)
2791         .input_offset(304)
2792         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2793     }
2794   }
2795 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,zero)2796   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, zero) {
2797     TEST_REQUIRES_ARM_NEON;
2798     for (uint32_t mz = 0; mz < 25; mz++) {
2799       for (uint32_t channels = 32; channels < 256; channels += 48) {
2800         DWConvMicrokernelTester()
2801           .cr(16)
2802           .kr(25)
2803           .channels(channels)
2804           .input_offset(304)
2805           .zero_index(mz)
2806           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2807       }
2808     }
2809   }
2810 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2811 
2812 
2813 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_eq_16)2814   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_eq_16) {
2815     TEST_REQUIRES_ARM_NEON;
2816     DWConvMicrokernelTester()
2817       .cr(16)
2818       .kr(25)
2819       .channels(16)
2820       .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2821   }
2822 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_div_16)2823   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_div_16) {
2824     TEST_REQUIRES_ARM_NEON;
2825     for (uint32_t channels = 32; channels < 256; channels += 48) {
2826       DWConvMicrokernelTester()
2827         .cr(16)
2828         .kr(25)
2829         .channels(channels)
2830         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2831     }
2832   }
2833 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_div_16_with_qmin)2834   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_div_16_with_qmin) {
2835     TEST_REQUIRES_ARM_NEON;
2836     for (uint32_t channels = 32; channels < 256; channels += 48) {
2837       DWConvMicrokernelTester()
2838         .cr(16)
2839         .kr(25)
2840         .channels(channels)
2841         .qmin(128)
2842         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2843     }
2844   }
2845 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_div_16_with_qmax)2846   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_div_16_with_qmax) {
2847     TEST_REQUIRES_ARM_NEON;
2848     for (uint32_t channels = 32; channels < 256; channels += 48) {
2849       DWConvMicrokernelTester()
2850         .cr(16)
2851         .kr(25)
2852         .channels(channels)
2853         .qmax(128)
2854         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2855     }
2856   }
2857 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_lt_16)2858   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_lt_16) {
2859     TEST_REQUIRES_ARM_NEON;
2860     for (uint32_t channels = 1; channels < 16; channels++) {
2861       DWConvMicrokernelTester()
2862         .cr(16)
2863         .kr(25)
2864         .channels(channels)
2865         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2866     }
2867   }
2868 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_gt_16)2869   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_gt_16) {
2870     TEST_REQUIRES_ARM_NEON;
2871     for (uint32_t channels = 17; channels < 32; channels++) {
2872       DWConvMicrokernelTester()
2873         .cr(16)
2874         .kr(25)
2875         .channels(channels)
2876         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2877     }
2878   }
2879 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_gt_16_with_qmin)2880   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_gt_16_with_qmin) {
2881     TEST_REQUIRES_ARM_NEON;
2882     for (uint32_t channels = 17; channels < 32; channels++) {
2883       DWConvMicrokernelTester()
2884         .cr(16)
2885         .kr(25)
2886         .channels(channels)
2887         .qmin(128)
2888         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2889     }
2890   }
2891 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_gt_16_with_qmax)2892   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_gt_16_with_qmax) {
2893     TEST_REQUIRES_ARM_NEON;
2894     for (uint32_t channels = 17; channels < 32; channels++) {
2895       DWConvMicrokernelTester()
2896         .cr(16)
2897         .kr(25)
2898         .channels(channels)
2899         .qmax(128)
2900         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2901     }
2902   }
2903 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,multipixel)2904   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, multipixel) {
2905     TEST_REQUIRES_ARM_NEON;
2906     for (size_t channels = 1; channels <= 80; channels += 15) {
2907       DWConvMicrokernelTester()
2908         .cr(16)
2909         .kr(25)
2910         .channels(channels)
2911         .width(3)
2912         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2913     }
2914   }
2915 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,multipixel_with_step)2916   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, multipixel_with_step) {
2917     TEST_REQUIRES_ARM_NEON;
2918     for (size_t channels = 1; channels <= 80; channels += 15) {
2919       for (size_t step = 2; step <= 25; step++) {
2920         DWConvMicrokernelTester()
2921           .cr(16)
2922           .kr(25)
2923           .channels(channels)
2924           .width(3)
2925           .step(step)
2926           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2927       }
2928     }
2929   }
2930 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,multipixel_with_output_stride)2931   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, multipixel_with_output_stride) {
2932     TEST_REQUIRES_ARM_NEON;
2933     for (size_t channels = 1; channels <= 80; channels += 15) {
2934       DWConvMicrokernelTester()
2935         .cr(16)
2936         .kr(25)
2937         .channels(16)
2938         .width(5)
2939         .output_stride(83)
2940         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2941     }
2942   }
2943 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,multipixel_with_qmin)2944   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, multipixel_with_qmin) {
2945     TEST_REQUIRES_ARM_NEON;
2946     for (size_t channels = 1; channels <= 80; channels += 15) {
2947       DWConvMicrokernelTester()
2948         .cr(16)
2949         .kr(25)
2950         .channels(channels)
2951         .width(3)
2952         .qmin(128)
2953         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2954     }
2955   }
2956 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,multipixel_with_qmax)2957   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, multipixel_with_qmax) {
2958     TEST_REQUIRES_ARM_NEON;
2959     for (size_t channels = 1; channels <= 80; channels += 15) {
2960       DWConvMicrokernelTester()
2961         .cr(16)
2962         .kr(25)
2963         .channels(channels)
2964         .width(3)
2965         .qmax(128)
2966         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2967     }
2968   }
2969 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,input_offset)2970   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, input_offset) {
2971     TEST_REQUIRES_ARM_NEON;
2972     for (uint32_t channels = 32; channels < 256; channels += 48) {
2973       DWConvMicrokernelTester()
2974         .cr(16)
2975         .kr(25)
2976         .channels(channels)
2977         .input_offset(304)
2978         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2979     }
2980   }
2981 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,zero)2982   TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, zero) {
2983     TEST_REQUIRES_ARM_NEON;
2984     for (uint32_t mz = 0; mz < 25; mz++) {
2985       for (uint32_t channels = 32; channels < 256; channels += 48) {
2986         DWConvMicrokernelTester()
2987           .cr(16)
2988           .kr(25)
2989           .channels(channels)
2990           .input_offset(304)
2991           .zero_index(mz)
2992           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2993       }
2994     }
2995   }
2996 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2997 
2998 
2999 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_eq_24)3000   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_eq_24) {
3001     TEST_REQUIRES_ARM_NEON;
3002     DWConvMicrokernelTester()
3003       .cr(24)
3004       .kr(9)
3005       .channels(24)
3006       .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3007   }
3008 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_div_24)3009   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_div_24) {
3010     TEST_REQUIRES_ARM_NEON;
3011     for (uint32_t channels = 48; channels < 384; channels += 72) {
3012       DWConvMicrokernelTester()
3013         .cr(24)
3014         .kr(9)
3015         .channels(channels)
3016         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3017     }
3018   }
3019 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_div_24_with_qmin)3020   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_div_24_with_qmin) {
3021     TEST_REQUIRES_ARM_NEON;
3022     for (uint32_t channels = 48; channels < 384; channels += 72) {
3023       DWConvMicrokernelTester()
3024         .cr(24)
3025         .kr(9)
3026         .channels(channels)
3027         .qmin(128)
3028         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3029     }
3030   }
3031 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_div_24_with_qmax)3032   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_div_24_with_qmax) {
3033     TEST_REQUIRES_ARM_NEON;
3034     for (uint32_t channels = 48; channels < 384; channels += 72) {
3035       DWConvMicrokernelTester()
3036         .cr(24)
3037         .kr(9)
3038         .channels(channels)
3039         .qmax(128)
3040         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3041     }
3042   }
3043 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_lt_24)3044   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_lt_24) {
3045     TEST_REQUIRES_ARM_NEON;
3046     for (uint32_t channels = 1; channels < 24; channels++) {
3047       DWConvMicrokernelTester()
3048         .cr(24)
3049         .kr(9)
3050         .channels(channels)
3051         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3052     }
3053   }
3054 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_gt_24)3055   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_gt_24) {
3056     TEST_REQUIRES_ARM_NEON;
3057     for (uint32_t channels = 25; channels < 48; channels++) {
3058       DWConvMicrokernelTester()
3059         .cr(24)
3060         .kr(9)
3061         .channels(channels)
3062         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3063     }
3064   }
3065 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_gt_24_with_qmin)3066   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_gt_24_with_qmin) {
3067     TEST_REQUIRES_ARM_NEON;
3068     for (uint32_t channels = 25; channels < 48; channels++) {
3069       DWConvMicrokernelTester()
3070         .cr(24)
3071         .kr(9)
3072         .channels(channels)
3073         .qmin(128)
3074         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3075     }
3076   }
3077 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_gt_24_with_qmax)3078   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_gt_24_with_qmax) {
3079     TEST_REQUIRES_ARM_NEON;
3080     for (uint32_t channels = 25; channels < 48; channels++) {
3081       DWConvMicrokernelTester()
3082         .cr(24)
3083         .kr(9)
3084         .channels(channels)
3085         .qmax(128)
3086         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3087     }
3088   }
3089 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,multipixel)3090   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, multipixel) {
3091     TEST_REQUIRES_ARM_NEON;
3092     for (size_t channels = 1; channels <= 120; channels += 23) {
3093       DWConvMicrokernelTester()
3094         .cr(24)
3095         .kr(9)
3096         .channels(channels)
3097         .width(3)
3098         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3099     }
3100   }
3101 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,multipixel_with_step)3102   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, multipixel_with_step) {
3103     TEST_REQUIRES_ARM_NEON;
3104     for (size_t channels = 1; channels <= 120; channels += 23) {
3105       for (size_t step = 2; step <= 9; step++) {
3106         DWConvMicrokernelTester()
3107           .cr(24)
3108           .kr(9)
3109           .channels(channels)
3110           .width(3)
3111           .step(step)
3112           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3113       }
3114     }
3115   }
3116 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,multipixel_with_output_stride)3117   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, multipixel_with_output_stride) {
3118     TEST_REQUIRES_ARM_NEON;
3119     for (size_t channels = 1; channels <= 120; channels += 23) {
3120       DWConvMicrokernelTester()
3121         .cr(24)
3122         .kr(9)
3123         .channels(24)
3124         .width(5)
3125         .output_stride(127)
3126         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3127     }
3128   }
3129 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,multipixel_with_qmin)3130   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, multipixel_with_qmin) {
3131     TEST_REQUIRES_ARM_NEON;
3132     for (size_t channels = 1; channels <= 120; channels += 23) {
3133       DWConvMicrokernelTester()
3134         .cr(24)
3135         .kr(9)
3136         .channels(channels)
3137         .width(3)
3138         .qmin(128)
3139         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3140     }
3141   }
3142 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,multipixel_with_qmax)3143   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, multipixel_with_qmax) {
3144     TEST_REQUIRES_ARM_NEON;
3145     for (size_t channels = 1; channels <= 120; channels += 23) {
3146       DWConvMicrokernelTester()
3147         .cr(24)
3148         .kr(9)
3149         .channels(channels)
3150         .width(3)
3151         .qmax(128)
3152         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3153     }
3154   }
3155 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,input_offset)3156   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, input_offset) {
3157     TEST_REQUIRES_ARM_NEON;
3158     for (uint32_t channels = 48; channels < 384; channels += 72) {
3159       DWConvMicrokernelTester()
3160         .cr(24)
3161         .kr(9)
3162         .channels(channels)
3163         .input_offset(464)
3164         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3165     }
3166   }
3167 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,zero)3168   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, zero) {
3169     TEST_REQUIRES_ARM_NEON;
3170     for (uint32_t mz = 0; mz < 9; mz++) {
3171       for (uint32_t channels = 48; channels < 384; channels += 72) {
3172         DWConvMicrokernelTester()
3173           .cr(24)
3174           .kr(9)
3175           .channels(channels)
3176           .input_offset(464)
3177           .zero_index(mz)
3178           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3179       }
3180     }
3181   }
3182 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3183 
3184 
3185 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_eq_24)3186   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_eq_24) {
3187     TEST_REQUIRES_ARM_NEON;
3188     DWConvMicrokernelTester()
3189       .cr(24)
3190       .kr(25)
3191       .channels(24)
3192       .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3193   }
3194 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_div_24)3195   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_div_24) {
3196     TEST_REQUIRES_ARM_NEON;
3197     for (uint32_t channels = 48; channels < 384; channels += 72) {
3198       DWConvMicrokernelTester()
3199         .cr(24)
3200         .kr(25)
3201         .channels(channels)
3202         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3203     }
3204   }
3205 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_div_24_with_qmin)3206   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_div_24_with_qmin) {
3207     TEST_REQUIRES_ARM_NEON;
3208     for (uint32_t channels = 48; channels < 384; channels += 72) {
3209       DWConvMicrokernelTester()
3210         .cr(24)
3211         .kr(25)
3212         .channels(channels)
3213         .qmin(128)
3214         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3215     }
3216   }
3217 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_div_24_with_qmax)3218   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_div_24_with_qmax) {
3219     TEST_REQUIRES_ARM_NEON;
3220     for (uint32_t channels = 48; channels < 384; channels += 72) {
3221       DWConvMicrokernelTester()
3222         .cr(24)
3223         .kr(25)
3224         .channels(channels)
3225         .qmax(128)
3226         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3227     }
3228   }
3229 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_lt_24)3230   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_lt_24) {
3231     TEST_REQUIRES_ARM_NEON;
3232     for (uint32_t channels = 1; channels < 24; channels++) {
3233       DWConvMicrokernelTester()
3234         .cr(24)
3235         .kr(25)
3236         .channels(channels)
3237         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3238     }
3239   }
3240 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_gt_24)3241   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_gt_24) {
3242     TEST_REQUIRES_ARM_NEON;
3243     for (uint32_t channels = 25; channels < 48; channels++) {
3244       DWConvMicrokernelTester()
3245         .cr(24)
3246         .kr(25)
3247         .channels(channels)
3248         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3249     }
3250   }
3251 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_gt_24_with_qmin)3252   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_gt_24_with_qmin) {
3253     TEST_REQUIRES_ARM_NEON;
3254     for (uint32_t channels = 25; channels < 48; channels++) {
3255       DWConvMicrokernelTester()
3256         .cr(24)
3257         .kr(25)
3258         .channels(channels)
3259         .qmin(128)
3260         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3261     }
3262   }
3263 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_gt_24_with_qmax)3264   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_gt_24_with_qmax) {
3265     TEST_REQUIRES_ARM_NEON;
3266     for (uint32_t channels = 25; channels < 48; channels++) {
3267       DWConvMicrokernelTester()
3268         .cr(24)
3269         .kr(25)
3270         .channels(channels)
3271         .qmax(128)
3272         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3273     }
3274   }
3275 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,multipixel)3276   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, multipixel) {
3277     TEST_REQUIRES_ARM_NEON;
3278     for (size_t channels = 1; channels <= 120; channels += 23) {
3279       DWConvMicrokernelTester()
3280         .cr(24)
3281         .kr(25)
3282         .channels(channels)
3283         .width(3)
3284         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3285     }
3286   }
3287 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,multipixel_with_step)3288   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, multipixel_with_step) {
3289     TEST_REQUIRES_ARM_NEON;
3290     for (size_t channels = 1; channels <= 120; channels += 23) {
3291       for (size_t step = 2; step <= 25; step++) {
3292         DWConvMicrokernelTester()
3293           .cr(24)
3294           .kr(25)
3295           .channels(channels)
3296           .width(3)
3297           .step(step)
3298           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3299       }
3300     }
3301   }
3302 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,multipixel_with_output_stride)3303   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, multipixel_with_output_stride) {
3304     TEST_REQUIRES_ARM_NEON;
3305     for (size_t channels = 1; channels <= 120; channels += 23) {
3306       DWConvMicrokernelTester()
3307         .cr(24)
3308         .kr(25)
3309         .channels(24)
3310         .width(5)
3311         .output_stride(127)
3312         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3313     }
3314   }
3315 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,multipixel_with_qmin)3316   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, multipixel_with_qmin) {
3317     TEST_REQUIRES_ARM_NEON;
3318     for (size_t channels = 1; channels <= 120; channels += 23) {
3319       DWConvMicrokernelTester()
3320         .cr(24)
3321         .kr(25)
3322         .channels(channels)
3323         .width(3)
3324         .qmin(128)
3325         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3326     }
3327   }
3328 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,multipixel_with_qmax)3329   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, multipixel_with_qmax) {
3330     TEST_REQUIRES_ARM_NEON;
3331     for (size_t channels = 1; channels <= 120; channels += 23) {
3332       DWConvMicrokernelTester()
3333         .cr(24)
3334         .kr(25)
3335         .channels(channels)
3336         .width(3)
3337         .qmax(128)
3338         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3339     }
3340   }
3341 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,input_offset)3342   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, input_offset) {
3343     TEST_REQUIRES_ARM_NEON;
3344     for (uint32_t channels = 48; channels < 384; channels += 72) {
3345       DWConvMicrokernelTester()
3346         .cr(24)
3347         .kr(25)
3348         .channels(channels)
3349         .input_offset(464)
3350         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3351     }
3352   }
3353 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,zero)3354   TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, zero) {
3355     TEST_REQUIRES_ARM_NEON;
3356     for (uint32_t mz = 0; mz < 25; mz++) {
3357       for (uint32_t channels = 48; channels < 384; channels += 72) {
3358         DWConvMicrokernelTester()
3359           .cr(24)
3360           .kr(25)
3361           .channels(channels)
3362           .input_offset(464)
3363           .zero_index(mz)
3364           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3365       }
3366     }
3367   }
3368 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3369 
3370 
3371 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_eq_32)3372   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_eq_32) {
3373     TEST_REQUIRES_ARM_NEON;
3374     DWConvMicrokernelTester()
3375       .cr(32)
3376       .kr(9)
3377       .channels(32)
3378       .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3379   }
3380 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_div_32)3381   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_div_32) {
3382     TEST_REQUIRES_ARM_NEON;
3383     for (uint32_t channels = 64; channels < 512; channels += 96) {
3384       DWConvMicrokernelTester()
3385         .cr(32)
3386         .kr(9)
3387         .channels(channels)
3388         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3389     }
3390   }
3391 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_div_32_with_qmin)3392   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_div_32_with_qmin) {
3393     TEST_REQUIRES_ARM_NEON;
3394     for (uint32_t channels = 64; channels < 512; channels += 96) {
3395       DWConvMicrokernelTester()
3396         .cr(32)
3397         .kr(9)
3398         .channels(channels)
3399         .qmin(128)
3400         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3401     }
3402   }
3403 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_div_32_with_qmax)3404   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_div_32_with_qmax) {
3405     TEST_REQUIRES_ARM_NEON;
3406     for (uint32_t channels = 64; channels < 512; channels += 96) {
3407       DWConvMicrokernelTester()
3408         .cr(32)
3409         .kr(9)
3410         .channels(channels)
3411         .qmax(128)
3412         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3413     }
3414   }
3415 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_lt_32)3416   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_lt_32) {
3417     TEST_REQUIRES_ARM_NEON;
3418     for (uint32_t channels = 1; channels < 32; channels++) {
3419       DWConvMicrokernelTester()
3420         .cr(32)
3421         .kr(9)
3422         .channels(channels)
3423         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3424     }
3425   }
3426 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_gt_32)3427   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_gt_32) {
3428     TEST_REQUIRES_ARM_NEON;
3429     for (uint32_t channels = 33; channels < 64; channels++) {
3430       DWConvMicrokernelTester()
3431         .cr(32)
3432         .kr(9)
3433         .channels(channels)
3434         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3435     }
3436   }
3437 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_gt_32_with_qmin)3438   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_gt_32_with_qmin) {
3439     TEST_REQUIRES_ARM_NEON;
3440     for (uint32_t channels = 33; channels < 64; channels++) {
3441       DWConvMicrokernelTester()
3442         .cr(32)
3443         .kr(9)
3444         .channels(channels)
3445         .qmin(128)
3446         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3447     }
3448   }
3449 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_gt_32_with_qmax)3450   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_gt_32_with_qmax) {
3451     TEST_REQUIRES_ARM_NEON;
3452     for (uint32_t channels = 33; channels < 64; channels++) {
3453       DWConvMicrokernelTester()
3454         .cr(32)
3455         .kr(9)
3456         .channels(channels)
3457         .qmax(128)
3458         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3459     }
3460   }
3461 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,multipixel)3462   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, multipixel) {
3463     TEST_REQUIRES_ARM_NEON;
3464     for (size_t channels = 1; channels <= 160; channels += 31) {
3465       DWConvMicrokernelTester()
3466         .cr(32)
3467         .kr(9)
3468         .channels(channels)
3469         .width(3)
3470         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3471     }
3472   }
3473 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,multipixel_with_step)3474   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, multipixel_with_step) {
3475     TEST_REQUIRES_ARM_NEON;
3476     for (size_t channels = 1; channels <= 160; channels += 31) {
3477       for (size_t step = 2; step <= 9; step++) {
3478         DWConvMicrokernelTester()
3479           .cr(32)
3480           .kr(9)
3481           .channels(channels)
3482           .width(3)
3483           .step(step)
3484           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3485       }
3486     }
3487   }
3488 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,multipixel_with_output_stride)3489   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, multipixel_with_output_stride) {
3490     TEST_REQUIRES_ARM_NEON;
3491     for (size_t channels = 1; channels <= 160; channels += 31) {
3492       DWConvMicrokernelTester()
3493         .cr(32)
3494         .kr(9)
3495         .channels(32)
3496         .width(5)
3497         .output_stride(163)
3498         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3499     }
3500   }
3501 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,multipixel_with_qmin)3502   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, multipixel_with_qmin) {
3503     TEST_REQUIRES_ARM_NEON;
3504     for (size_t channels = 1; channels <= 160; channels += 31) {
3505       DWConvMicrokernelTester()
3506         .cr(32)
3507         .kr(9)
3508         .channels(channels)
3509         .width(3)
3510         .qmin(128)
3511         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3512     }
3513   }
3514 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,multipixel_with_qmax)3515   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, multipixel_with_qmax) {
3516     TEST_REQUIRES_ARM_NEON;
3517     for (size_t channels = 1; channels <= 160; channels += 31) {
3518       DWConvMicrokernelTester()
3519         .cr(32)
3520         .kr(9)
3521         .channels(channels)
3522         .width(3)
3523         .qmax(128)
3524         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3525     }
3526   }
3527 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,input_offset)3528   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, input_offset) {
3529     TEST_REQUIRES_ARM_NEON;
3530     for (uint32_t channels = 64; channels < 512; channels += 96) {
3531       DWConvMicrokernelTester()
3532         .cr(32)
3533         .kr(9)
3534         .channels(channels)
3535         .input_offset(592)
3536         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3537     }
3538   }
3539 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,zero)3540   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, zero) {
3541     TEST_REQUIRES_ARM_NEON;
3542     for (uint32_t mz = 0; mz < 9; mz++) {
3543       for (uint32_t channels = 64; channels < 512; channels += 96) {
3544         DWConvMicrokernelTester()
3545           .cr(32)
3546           .kr(9)
3547           .channels(channels)
3548           .input_offset(592)
3549           .zero_index(mz)
3550           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3551       }
3552     }
3553   }
3554 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3555 
3556 
3557 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_eq_32)3558   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_eq_32) {
3559     TEST_REQUIRES_ARM_NEON;
3560     DWConvMicrokernelTester()
3561       .cr(32)
3562       .kr(25)
3563       .channels(32)
3564       .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3565   }
3566 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_div_32)3567   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_div_32) {
3568     TEST_REQUIRES_ARM_NEON;
3569     for (uint32_t channels = 64; channels < 512; channels += 96) {
3570       DWConvMicrokernelTester()
3571         .cr(32)
3572         .kr(25)
3573         .channels(channels)
3574         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3575     }
3576   }
3577 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_div_32_with_qmin)3578   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_div_32_with_qmin) {
3579     TEST_REQUIRES_ARM_NEON;
3580     for (uint32_t channels = 64; channels < 512; channels += 96) {
3581       DWConvMicrokernelTester()
3582         .cr(32)
3583         .kr(25)
3584         .channels(channels)
3585         .qmin(128)
3586         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3587     }
3588   }
3589 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_div_32_with_qmax)3590   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_div_32_with_qmax) {
3591     TEST_REQUIRES_ARM_NEON;
3592     for (uint32_t channels = 64; channels < 512; channels += 96) {
3593       DWConvMicrokernelTester()
3594         .cr(32)
3595         .kr(25)
3596         .channels(channels)
3597         .qmax(128)
3598         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3599     }
3600   }
3601 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_lt_32)3602   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_lt_32) {
3603     TEST_REQUIRES_ARM_NEON;
3604     for (uint32_t channels = 1; channels < 32; channels++) {
3605       DWConvMicrokernelTester()
3606         .cr(32)
3607         .kr(25)
3608         .channels(channels)
3609         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3610     }
3611   }
3612 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_gt_32)3613   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_gt_32) {
3614     TEST_REQUIRES_ARM_NEON;
3615     for (uint32_t channels = 33; channels < 64; channels++) {
3616       DWConvMicrokernelTester()
3617         .cr(32)
3618         .kr(25)
3619         .channels(channels)
3620         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3621     }
3622   }
3623 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_gt_32_with_qmin)3624   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_gt_32_with_qmin) {
3625     TEST_REQUIRES_ARM_NEON;
3626     for (uint32_t channels = 33; channels < 64; channels++) {
3627       DWConvMicrokernelTester()
3628         .cr(32)
3629         .kr(25)
3630         .channels(channels)
3631         .qmin(128)
3632         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3633     }
3634   }
3635 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_gt_32_with_qmax)3636   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_gt_32_with_qmax) {
3637     TEST_REQUIRES_ARM_NEON;
3638     for (uint32_t channels = 33; channels < 64; channels++) {
3639       DWConvMicrokernelTester()
3640         .cr(32)
3641         .kr(25)
3642         .channels(channels)
3643         .qmax(128)
3644         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3645     }
3646   }
3647 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,multipixel)3648   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, multipixel) {
3649     TEST_REQUIRES_ARM_NEON;
3650     for (size_t channels = 1; channels <= 160; channels += 31) {
3651       DWConvMicrokernelTester()
3652         .cr(32)
3653         .kr(25)
3654         .channels(channels)
3655         .width(3)
3656         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3657     }
3658   }
3659 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,multipixel_with_step)3660   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, multipixel_with_step) {
3661     TEST_REQUIRES_ARM_NEON;
3662     for (size_t channels = 1; channels <= 160; channels += 31) {
3663       for (size_t step = 2; step <= 25; step++) {
3664         DWConvMicrokernelTester()
3665           .cr(32)
3666           .kr(25)
3667           .channels(channels)
3668           .width(3)
3669           .step(step)
3670           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3671       }
3672     }
3673   }
3674 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,multipixel_with_output_stride)3675   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, multipixel_with_output_stride) {
3676     TEST_REQUIRES_ARM_NEON;
3677     for (size_t channels = 1; channels <= 160; channels += 31) {
3678       DWConvMicrokernelTester()
3679         .cr(32)
3680         .kr(25)
3681         .channels(32)
3682         .width(5)
3683         .output_stride(163)
3684         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3685     }
3686   }
3687 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,multipixel_with_qmin)3688   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, multipixel_with_qmin) {
3689     TEST_REQUIRES_ARM_NEON;
3690     for (size_t channels = 1; channels <= 160; channels += 31) {
3691       DWConvMicrokernelTester()
3692         .cr(32)
3693         .kr(25)
3694         .channels(channels)
3695         .width(3)
3696         .qmin(128)
3697         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3698     }
3699   }
3700 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,multipixel_with_qmax)3701   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, multipixel_with_qmax) {
3702     TEST_REQUIRES_ARM_NEON;
3703     for (size_t channels = 1; channels <= 160; channels += 31) {
3704       DWConvMicrokernelTester()
3705         .cr(32)
3706         .kr(25)
3707         .channels(channels)
3708         .width(3)
3709         .qmax(128)
3710         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3711     }
3712   }
3713 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,input_offset)3714   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, input_offset) {
3715     TEST_REQUIRES_ARM_NEON;
3716     for (uint32_t channels = 64; channels < 512; channels += 96) {
3717       DWConvMicrokernelTester()
3718         .cr(32)
3719         .kr(25)
3720         .channels(channels)
3721         .input_offset(592)
3722         .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3723     }
3724   }
3725 
TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,zero)3726   TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, zero) {
3727     TEST_REQUIRES_ARM_NEON;
3728     for (uint32_t mz = 0; mz < 25; mz++) {
3729       for (uint32_t channels = 64; channels < 512; channels += 96) {
3730         DWConvMicrokernelTester()
3731           .cr(32)
3732           .kr(25)
3733           .channels(channels)
3734           .input_offset(592)
3735           .zero_index(mz)
3736           .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3737       }
3738     }
3739   }
3740 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3741