• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 //   Specification: test/f16-dwconv-minmax.yaml
11 //   Generator: tools/generate-dwconv-test.py
12 
13 
14 #include <gtest/gtest.h>
15 
16 #include <xnnpack/common.h>
17 #include <xnnpack/isa-checks.h>
18 
19 #include <xnnpack/dwconv.h>
20 #include "dwconv-microkernel-tester.h"
21 
22 
23 #if XNN_ARCH_ARM64
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_eq_8)24   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_eq_8) {
25     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
26     DWConvMicrokernelTester()
27       .cr(8)
28       .kr(25)
29       .channels(8)
30       .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith);
31   }
32 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_div_8)33   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_div_8) {
34     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
35     for (uint32_t channels = 16; channels < 128; channels += 24) {
36       DWConvMicrokernelTester()
37         .cr(8)
38         .kr(25)
39         .channels(channels)
40         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith);
41     }
42   }
43 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_div_8_with_qmin)44   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_div_8_with_qmin) {
45     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
46     for (uint32_t channels = 16; channels < 128; channels += 24) {
47       DWConvMicrokernelTester()
48         .cr(8)
49         .kr(25)
50         .channels(channels)
51         .qmin(128)
52         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith);
53     }
54   }
55 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_div_8_with_qmax)56   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_div_8_with_qmax) {
57     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
58     for (uint32_t channels = 16; channels < 128; channels += 24) {
59       DWConvMicrokernelTester()
60         .cr(8)
61         .kr(25)
62         .channels(channels)
63         .qmax(128)
64         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith);
65     }
66   }
67 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_lt_8)68   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_lt_8) {
69     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
70     for (uint32_t channels = 1; channels < 8; channels++) {
71       DWConvMicrokernelTester()
72         .cr(8)
73         .kr(25)
74         .channels(channels)
75         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith);
76     }
77   }
78 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_gt_8)79   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_gt_8) {
80     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
81     for (uint32_t channels = 9; channels < 16; channels++) {
82       DWConvMicrokernelTester()
83         .cr(8)
84         .kr(25)
85         .channels(channels)
86         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith);
87     }
88   }
89 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_gt_8_with_qmin)90   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_gt_8_with_qmin) {
91     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
92     for (uint32_t channels = 9; channels < 16; channels++) {
93       DWConvMicrokernelTester()
94         .cr(8)
95         .kr(25)
96         .channels(channels)
97         .qmin(128)
98         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith);
99     }
100   }
101 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_gt_8_with_qmax)102   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_gt_8_with_qmax) {
103     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
104     for (uint32_t channels = 9; channels < 16; channels++) {
105       DWConvMicrokernelTester()
106         .cr(8)
107         .kr(25)
108         .channels(channels)
109         .qmax(128)
110         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith);
111     }
112   }
113 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,multipixel)114   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, multipixel) {
115     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
116     for (size_t channels = 1; channels <= 40; channels += 7) {
117       DWConvMicrokernelTester()
118         .cr(8)
119         .kr(25)
120         .channels(channels)
121         .width(3)
122         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith);
123     }
124   }
125 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,multipixel_with_step)126   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, multipixel_with_step) {
127     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
128     for (size_t channels = 1; channels <= 40; channels += 7) {
129       for (size_t step = 2; step <= 25; step++) {
130         DWConvMicrokernelTester()
131           .cr(8)
132           .kr(25)
133           .channels(channels)
134           .width(3)
135           .step(step)
136           .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith);
137       }
138     }
139   }
140 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,multipixel_with_output_stride)141   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, multipixel_with_output_stride) {
142     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
143     for (size_t channels = 1; channels <= 40; channels += 7) {
144       DWConvMicrokernelTester()
145         .cr(8)
146         .kr(25)
147         .channels(8)
148         .width(5)
149         .output_stride(43)
150         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith);
151     }
152   }
153 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,multipixel_with_qmin)154   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, multipixel_with_qmin) {
155     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
156     for (size_t channels = 1; channels <= 40; channels += 7) {
157       DWConvMicrokernelTester()
158         .cr(8)
159         .kr(25)
160         .channels(channels)
161         .width(3)
162         .qmin(128)
163         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith);
164     }
165   }
166 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,multipixel_with_qmax)167   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, multipixel_with_qmax) {
168     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
169     for (size_t channels = 1; channels <= 40; channels += 7) {
170       DWConvMicrokernelTester()
171         .cr(8)
172         .kr(25)
173         .channels(channels)
174         .width(3)
175         .qmax(128)
176         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith);
177     }
178   }
179 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,input_offset)180   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, input_offset) {
181     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
182     for (uint32_t channels = 16; channels < 128; channels += 24) {
183       DWConvMicrokernelTester()
184         .cr(8)
185         .kr(25)
186         .channels(channels)
187         .input_offset(176)
188         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith);
189     }
190   }
191 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,zero)192   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, zero) {
193     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
194     for (uint32_t mz = 0; mz < 25; mz++) {
195       for (uint32_t channels = 16; channels < 128; channels += 24) {
196         DWConvMicrokernelTester()
197           .cr(8)
198           .kr(25)
199           .channels(channels)
200           .input_offset(176)
201           .zero_index(mz)
202           .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith);
203       }
204     }
205   }
206 #endif  // XNN_ARCH_ARM64
207 
208 
209 #if XNN_ARCH_ARM64
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_eq_8)210   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_eq_8) {
211     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
212     DWConvMicrokernelTester()
213       .cr(8)
214       .kr(25)
215       .channels(8)
216       .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2);
217   }
218 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_div_8)219   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_div_8) {
220     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
221     for (uint32_t channels = 16; channels < 128; channels += 24) {
222       DWConvMicrokernelTester()
223         .cr(8)
224         .kr(25)
225         .channels(channels)
226         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2);
227     }
228   }
229 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_div_8_with_qmin)230   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_div_8_with_qmin) {
231     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
232     for (uint32_t channels = 16; channels < 128; channels += 24) {
233       DWConvMicrokernelTester()
234         .cr(8)
235         .kr(25)
236         .channels(channels)
237         .qmin(128)
238         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2);
239     }
240   }
241 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_div_8_with_qmax)242   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_div_8_with_qmax) {
243     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
244     for (uint32_t channels = 16; channels < 128; channels += 24) {
245       DWConvMicrokernelTester()
246         .cr(8)
247         .kr(25)
248         .channels(channels)
249         .qmax(128)
250         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2);
251     }
252   }
253 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_lt_8)254   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_lt_8) {
255     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
256     for (uint32_t channels = 1; channels < 8; channels++) {
257       DWConvMicrokernelTester()
258         .cr(8)
259         .kr(25)
260         .channels(channels)
261         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2);
262     }
263   }
264 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_gt_8)265   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_gt_8) {
266     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
267     for (uint32_t channels = 9; channels < 16; channels++) {
268       DWConvMicrokernelTester()
269         .cr(8)
270         .kr(25)
271         .channels(channels)
272         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2);
273     }
274   }
275 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_gt_8_with_qmin)276   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_gt_8_with_qmin) {
277     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
278     for (uint32_t channels = 9; channels < 16; channels++) {
279       DWConvMicrokernelTester()
280         .cr(8)
281         .kr(25)
282         .channels(channels)
283         .qmin(128)
284         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2);
285     }
286   }
287 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_gt_8_with_qmax)288   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_gt_8_with_qmax) {
289     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
290     for (uint32_t channels = 9; channels < 16; channels++) {
291       DWConvMicrokernelTester()
292         .cr(8)
293         .kr(25)
294         .channels(channels)
295         .qmax(128)
296         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2);
297     }
298   }
299 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,multipixel)300   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, multipixel) {
301     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
302     for (size_t channels = 1; channels <= 40; channels += 7) {
303       DWConvMicrokernelTester()
304         .cr(8)
305         .kr(25)
306         .channels(channels)
307         .width(3)
308         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2);
309     }
310   }
311 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,multipixel_with_step)312   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, multipixel_with_step) {
313     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
314     for (size_t channels = 1; channels <= 40; channels += 7) {
315       for (size_t step = 2; step <= 25; step++) {
316         DWConvMicrokernelTester()
317           .cr(8)
318           .kr(25)
319           .channels(channels)
320           .width(3)
321           .step(step)
322           .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2);
323       }
324     }
325   }
326 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,multipixel_with_output_stride)327   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
328     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
329     for (size_t channels = 1; channels <= 40; channels += 7) {
330       DWConvMicrokernelTester()
331         .cr(8)
332         .kr(25)
333         .channels(8)
334         .width(5)
335         .output_stride(43)
336         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2);
337     }
338   }
339 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,multipixel_with_qmin)340   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
341     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
342     for (size_t channels = 1; channels <= 40; channels += 7) {
343       DWConvMicrokernelTester()
344         .cr(8)
345         .kr(25)
346         .channels(channels)
347         .width(3)
348         .qmin(128)
349         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2);
350     }
351   }
352 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,multipixel_with_qmax)353   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
354     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
355     for (size_t channels = 1; channels <= 40; channels += 7) {
356       DWConvMicrokernelTester()
357         .cr(8)
358         .kr(25)
359         .channels(channels)
360         .width(3)
361         .qmax(128)
362         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2);
363     }
364   }
365 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,input_offset)366   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, input_offset) {
367     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
368     for (uint32_t channels = 16; channels < 128; channels += 24) {
369       DWConvMicrokernelTester()
370         .cr(8)
371         .kr(25)
372         .channels(channels)
373         .input_offset(176)
374         .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2);
375     }
376   }
377 
TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,zero)378   TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, zero) {
379     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
380     for (uint32_t mz = 0; mz < 25; mz++) {
381       for (uint32_t channels = 16; channels < 128; channels += 24) {
382         DWConvMicrokernelTester()
383           .cr(8)
384           .kr(25)
385           .channels(channels)
386           .input_offset(176)
387           .zero_index(mz)
388           .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2);
389       }
390     }
391   }
392 #endif  // XNN_ARCH_ARM64
393 
394 
395 #if XNN_ARCH_ARM64
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_eq_16)396   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_eq_16) {
397     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
398     DWConvMicrokernelTester()
399       .cr(16)
400       .kr(25)
401       .channels(16)
402       .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith);
403   }
404 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_div_16)405   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_div_16) {
406     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
407     for (uint32_t channels = 32; channels < 256; channels += 48) {
408       DWConvMicrokernelTester()
409         .cr(16)
410         .kr(25)
411         .channels(channels)
412         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith);
413     }
414   }
415 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_div_16_with_qmin)416   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_div_16_with_qmin) {
417     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
418     for (uint32_t channels = 32; channels < 256; channels += 48) {
419       DWConvMicrokernelTester()
420         .cr(16)
421         .kr(25)
422         .channels(channels)
423         .qmin(128)
424         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith);
425     }
426   }
427 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_div_16_with_qmax)428   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_div_16_with_qmax) {
429     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
430     for (uint32_t channels = 32; channels < 256; channels += 48) {
431       DWConvMicrokernelTester()
432         .cr(16)
433         .kr(25)
434         .channels(channels)
435         .qmax(128)
436         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith);
437     }
438   }
439 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_lt_16)440   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_lt_16) {
441     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
442     for (uint32_t channels = 1; channels < 16; channels++) {
443       DWConvMicrokernelTester()
444         .cr(16)
445         .kr(25)
446         .channels(channels)
447         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith);
448     }
449   }
450 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_gt_16)451   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_gt_16) {
452     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
453     for (uint32_t channels = 17; channels < 32; channels++) {
454       DWConvMicrokernelTester()
455         .cr(16)
456         .kr(25)
457         .channels(channels)
458         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith);
459     }
460   }
461 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_gt_16_with_qmin)462   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_gt_16_with_qmin) {
463     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
464     for (uint32_t channels = 17; channels < 32; channels++) {
465       DWConvMicrokernelTester()
466         .cr(16)
467         .kr(25)
468         .channels(channels)
469         .qmin(128)
470         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith);
471     }
472   }
473 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_gt_16_with_qmax)474   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_gt_16_with_qmax) {
475     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
476     for (uint32_t channels = 17; channels < 32; channels++) {
477       DWConvMicrokernelTester()
478         .cr(16)
479         .kr(25)
480         .channels(channels)
481         .qmax(128)
482         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith);
483     }
484   }
485 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,multipixel)486   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, multipixel) {
487     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
488     for (size_t channels = 1; channels <= 80; channels += 15) {
489       DWConvMicrokernelTester()
490         .cr(16)
491         .kr(25)
492         .channels(channels)
493         .width(3)
494         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith);
495     }
496   }
497 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,multipixel_with_step)498   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, multipixel_with_step) {
499     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
500     for (size_t channels = 1; channels <= 80; channels += 15) {
501       for (size_t step = 2; step <= 25; step++) {
502         DWConvMicrokernelTester()
503           .cr(16)
504           .kr(25)
505           .channels(channels)
506           .width(3)
507           .step(step)
508           .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith);
509       }
510     }
511   }
512 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,multipixel_with_output_stride)513   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, multipixel_with_output_stride) {
514     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
515     for (size_t channels = 1; channels <= 80; channels += 15) {
516       DWConvMicrokernelTester()
517         .cr(16)
518         .kr(25)
519         .channels(16)
520         .width(5)
521         .output_stride(83)
522         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith);
523     }
524   }
525 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,multipixel_with_qmin)526   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, multipixel_with_qmin) {
527     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
528     for (size_t channels = 1; channels <= 80; channels += 15) {
529       DWConvMicrokernelTester()
530         .cr(16)
531         .kr(25)
532         .channels(channels)
533         .width(3)
534         .qmin(128)
535         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith);
536     }
537   }
538 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,multipixel_with_qmax)539   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, multipixel_with_qmax) {
540     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
541     for (size_t channels = 1; channels <= 80; channels += 15) {
542       DWConvMicrokernelTester()
543         .cr(16)
544         .kr(25)
545         .channels(channels)
546         .width(3)
547         .qmax(128)
548         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith);
549     }
550   }
551 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,input_offset)552   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, input_offset) {
553     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
554     for (uint32_t channels = 32; channels < 256; channels += 48) {
555       DWConvMicrokernelTester()
556         .cr(16)
557         .kr(25)
558         .channels(channels)
559         .input_offset(304)
560         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith);
561     }
562   }
563 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,zero)564   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, zero) {
565     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
566     for (uint32_t mz = 0; mz < 25; mz++) {
567       for (uint32_t channels = 32; channels < 256; channels += 48) {
568         DWConvMicrokernelTester()
569           .cr(16)
570           .kr(25)
571           .channels(channels)
572           .input_offset(304)
573           .zero_index(mz)
574           .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith);
575       }
576     }
577   }
578 #endif  // XNN_ARCH_ARM64
579 
580 
581 #if XNN_ARCH_ARM64
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_eq_16)582   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_eq_16) {
583     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
584     DWConvMicrokernelTester()
585       .cr(16)
586       .kr(25)
587       .channels(16)
588       .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2);
589   }
590 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_div_16)591   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_div_16) {
592     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
593     for (uint32_t channels = 32; channels < 256; channels += 48) {
594       DWConvMicrokernelTester()
595         .cr(16)
596         .kr(25)
597         .channels(channels)
598         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2);
599     }
600   }
601 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_div_16_with_qmin)602   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_div_16_with_qmin) {
603     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
604     for (uint32_t channels = 32; channels < 256; channels += 48) {
605       DWConvMicrokernelTester()
606         .cr(16)
607         .kr(25)
608         .channels(channels)
609         .qmin(128)
610         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2);
611     }
612   }
613 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_div_16_with_qmax)614   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_div_16_with_qmax) {
615     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
616     for (uint32_t channels = 32; channels < 256; channels += 48) {
617       DWConvMicrokernelTester()
618         .cr(16)
619         .kr(25)
620         .channels(channels)
621         .qmax(128)
622         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2);
623     }
624   }
625 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_lt_16)626   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_lt_16) {
627     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
628     for (uint32_t channels = 1; channels < 16; channels++) {
629       DWConvMicrokernelTester()
630         .cr(16)
631         .kr(25)
632         .channels(channels)
633         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2);
634     }
635   }
636 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_gt_16)637   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_gt_16) {
638     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
639     for (uint32_t channels = 17; channels < 32; channels++) {
640       DWConvMicrokernelTester()
641         .cr(16)
642         .kr(25)
643         .channels(channels)
644         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2);
645     }
646   }
647 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_gt_16_with_qmin)648   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_gt_16_with_qmin) {
649     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
650     for (uint32_t channels = 17; channels < 32; channels++) {
651       DWConvMicrokernelTester()
652         .cr(16)
653         .kr(25)
654         .channels(channels)
655         .qmin(128)
656         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2);
657     }
658   }
659 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_gt_16_with_qmax)660   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_gt_16_with_qmax) {
661     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
662     for (uint32_t channels = 17; channels < 32; channels++) {
663       DWConvMicrokernelTester()
664         .cr(16)
665         .kr(25)
666         .channels(channels)
667         .qmax(128)
668         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2);
669     }
670   }
671 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,multipixel)672   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, multipixel) {
673     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
674     for (size_t channels = 1; channels <= 80; channels += 15) {
675       DWConvMicrokernelTester()
676         .cr(16)
677         .kr(25)
678         .channels(channels)
679         .width(3)
680         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2);
681     }
682   }
683 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,multipixel_with_step)684   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, multipixel_with_step) {
685     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
686     for (size_t channels = 1; channels <= 80; channels += 15) {
687       for (size_t step = 2; step <= 25; step++) {
688         DWConvMicrokernelTester()
689           .cr(16)
690           .kr(25)
691           .channels(channels)
692           .width(3)
693           .step(step)
694           .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2);
695       }
696     }
697   }
698 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,multipixel_with_output_stride)699   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
700     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
701     for (size_t channels = 1; channels <= 80; channels += 15) {
702       DWConvMicrokernelTester()
703         .cr(16)
704         .kr(25)
705         .channels(16)
706         .width(5)
707         .output_stride(83)
708         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2);
709     }
710   }
711 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,multipixel_with_qmin)712   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
713     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
714     for (size_t channels = 1; channels <= 80; channels += 15) {
715       DWConvMicrokernelTester()
716         .cr(16)
717         .kr(25)
718         .channels(channels)
719         .width(3)
720         .qmin(128)
721         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2);
722     }
723   }
724 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,multipixel_with_qmax)725   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
726     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
727     for (size_t channels = 1; channels <= 80; channels += 15) {
728       DWConvMicrokernelTester()
729         .cr(16)
730         .kr(25)
731         .channels(channels)
732         .width(3)
733         .qmax(128)
734         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2);
735     }
736   }
737 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,input_offset)738   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, input_offset) {
739     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
740     for (uint32_t channels = 32; channels < 256; channels += 48) {
741       DWConvMicrokernelTester()
742         .cr(16)
743         .kr(25)
744         .channels(channels)
745         .input_offset(304)
746         .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2);
747     }
748   }
749 
TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,zero)750   TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, zero) {
751     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
752     for (uint32_t mz = 0; mz < 25; mz++) {
753       for (uint32_t channels = 32; channels < 256; channels += 48) {
754         DWConvMicrokernelTester()
755           .cr(16)
756           .kr(25)
757           .channels(channels)
758           .input_offset(304)
759           .zero_index(mz)
760           .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2);
761       }
762     }
763   }
764 #endif  // XNN_ARCH_ARM64
765 
766 
767 #if XNN_ARCH_ARM64
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_eq_8)768   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_eq_8) {
769     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
770     DWConvMicrokernelTester()
771       .cr(8)
772       .kr(9)
773       .channels(8)
774       .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith);
775   }
776 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_div_8)777   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_div_8) {
778     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
779     for (uint32_t channels = 16; channels < 128; channels += 24) {
780       DWConvMicrokernelTester()
781         .cr(8)
782         .kr(9)
783         .channels(channels)
784         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith);
785     }
786   }
787 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_div_8_with_qmin)788   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_div_8_with_qmin) {
789     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
790     for (uint32_t channels = 16; channels < 128; channels += 24) {
791       DWConvMicrokernelTester()
792         .cr(8)
793         .kr(9)
794         .channels(channels)
795         .qmin(128)
796         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith);
797     }
798   }
799 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_div_8_with_qmax)800   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_div_8_with_qmax) {
801     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
802     for (uint32_t channels = 16; channels < 128; channels += 24) {
803       DWConvMicrokernelTester()
804         .cr(8)
805         .kr(9)
806         .channels(channels)
807         .qmax(128)
808         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith);
809     }
810   }
811 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_lt_8)812   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_lt_8) {
813     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
814     for (uint32_t channels = 1; channels < 8; channels++) {
815       DWConvMicrokernelTester()
816         .cr(8)
817         .kr(9)
818         .channels(channels)
819         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith);
820     }
821   }
822 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_gt_8)823   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_gt_8) {
824     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
825     for (uint32_t channels = 9; channels < 16; channels++) {
826       DWConvMicrokernelTester()
827         .cr(8)
828         .kr(9)
829         .channels(channels)
830         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith);
831     }
832   }
833 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_gt_8_with_qmin)834   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_gt_8_with_qmin) {
835     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
836     for (uint32_t channels = 9; channels < 16; channels++) {
837       DWConvMicrokernelTester()
838         .cr(8)
839         .kr(9)
840         .channels(channels)
841         .qmin(128)
842         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith);
843     }
844   }
845 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_gt_8_with_qmax)846   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_gt_8_with_qmax) {
847     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
848     for (uint32_t channels = 9; channels < 16; channels++) {
849       DWConvMicrokernelTester()
850         .cr(8)
851         .kr(9)
852         .channels(channels)
853         .qmax(128)
854         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith);
855     }
856   }
857 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,multipixel)858   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, multipixel) {
859     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
860     for (size_t channels = 1; channels <= 40; channels += 7) {
861       DWConvMicrokernelTester()
862         .cr(8)
863         .kr(9)
864         .channels(channels)
865         .width(3)
866         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith);
867     }
868   }
869 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,multipixel_with_step)870   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, multipixel_with_step) {
871     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
872     for (size_t channels = 1; channels <= 40; channels += 7) {
873       for (size_t step = 2; step <= 9; step++) {
874         DWConvMicrokernelTester()
875           .cr(8)
876           .kr(9)
877           .channels(channels)
878           .width(3)
879           .step(step)
880           .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith);
881       }
882     }
883   }
884 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,multipixel_with_output_stride)885   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, multipixel_with_output_stride) {
886     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
887     for (size_t channels = 1; channels <= 40; channels += 7) {
888       DWConvMicrokernelTester()
889         .cr(8)
890         .kr(9)
891         .channels(8)
892         .width(5)
893         .output_stride(43)
894         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith);
895     }
896   }
897 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,multipixel_with_qmin)898   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, multipixel_with_qmin) {
899     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
900     for (size_t channels = 1; channels <= 40; channels += 7) {
901       DWConvMicrokernelTester()
902         .cr(8)
903         .kr(9)
904         .channels(channels)
905         .width(3)
906         .qmin(128)
907         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith);
908     }
909   }
910 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,multipixel_with_qmax)911   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, multipixel_with_qmax) {
912     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
913     for (size_t channels = 1; channels <= 40; channels += 7) {
914       DWConvMicrokernelTester()
915         .cr(8)
916         .kr(9)
917         .channels(channels)
918         .width(3)
919         .qmax(128)
920         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith);
921     }
922   }
923 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,input_offset)924   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, input_offset) {
925     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
926     for (uint32_t channels = 16; channels < 128; channels += 24) {
927       DWConvMicrokernelTester()
928         .cr(8)
929         .kr(9)
930         .channels(channels)
931         .input_offset(176)
932         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith);
933     }
934   }
935 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,zero)936   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, zero) {
937     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
938     for (uint32_t mz = 0; mz < 9; mz++) {
939       for (uint32_t channels = 16; channels < 128; channels += 24) {
940         DWConvMicrokernelTester()
941           .cr(8)
942           .kr(9)
943           .channels(channels)
944           .input_offset(176)
945           .zero_index(mz)
946           .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith);
947       }
948     }
949   }
950 #endif  // XNN_ARCH_ARM64
951 
952 
953 #if XNN_ARCH_ARM64
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_eq_8)954   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_eq_8) {
955     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
956     DWConvMicrokernelTester()
957       .cr(8)
958       .kr(9)
959       .channels(8)
960       .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2);
961   }
962 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_div_8)963   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_div_8) {
964     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
965     for (uint32_t channels = 16; channels < 128; channels += 24) {
966       DWConvMicrokernelTester()
967         .cr(8)
968         .kr(9)
969         .channels(channels)
970         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2);
971     }
972   }
973 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_div_8_with_qmin)974   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_div_8_with_qmin) {
975     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
976     for (uint32_t channels = 16; channels < 128; channels += 24) {
977       DWConvMicrokernelTester()
978         .cr(8)
979         .kr(9)
980         .channels(channels)
981         .qmin(128)
982         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2);
983     }
984   }
985 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_div_8_with_qmax)986   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_div_8_with_qmax) {
987     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
988     for (uint32_t channels = 16; channels < 128; channels += 24) {
989       DWConvMicrokernelTester()
990         .cr(8)
991         .kr(9)
992         .channels(channels)
993         .qmax(128)
994         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2);
995     }
996   }
997 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_lt_8)998   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_lt_8) {
999     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1000     for (uint32_t channels = 1; channels < 8; channels++) {
1001       DWConvMicrokernelTester()
1002         .cr(8)
1003         .kr(9)
1004         .channels(channels)
1005         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2);
1006     }
1007   }
1008 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_gt_8)1009   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_gt_8) {
1010     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1011     for (uint32_t channels = 9; channels < 16; channels++) {
1012       DWConvMicrokernelTester()
1013         .cr(8)
1014         .kr(9)
1015         .channels(channels)
1016         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2);
1017     }
1018   }
1019 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_gt_8_with_qmin)1020   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_gt_8_with_qmin) {
1021     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1022     for (uint32_t channels = 9; channels < 16; channels++) {
1023       DWConvMicrokernelTester()
1024         .cr(8)
1025         .kr(9)
1026         .channels(channels)
1027         .qmin(128)
1028         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2);
1029     }
1030   }
1031 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_gt_8_with_qmax)1032   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_gt_8_with_qmax) {
1033     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1034     for (uint32_t channels = 9; channels < 16; channels++) {
1035       DWConvMicrokernelTester()
1036         .cr(8)
1037         .kr(9)
1038         .channels(channels)
1039         .qmax(128)
1040         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2);
1041     }
1042   }
1043 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,multipixel)1044   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, multipixel) {
1045     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1046     for (size_t channels = 1; channels <= 40; channels += 7) {
1047       DWConvMicrokernelTester()
1048         .cr(8)
1049         .kr(9)
1050         .channels(channels)
1051         .width(3)
1052         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2);
1053     }
1054   }
1055 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,multipixel_with_step)1056   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, multipixel_with_step) {
1057     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1058     for (size_t channels = 1; channels <= 40; channels += 7) {
1059       for (size_t step = 2; step <= 9; step++) {
1060         DWConvMicrokernelTester()
1061           .cr(8)
1062           .kr(9)
1063           .channels(channels)
1064           .width(3)
1065           .step(step)
1066           .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2);
1067       }
1068     }
1069   }
1070 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,multipixel_with_output_stride)1071   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
1072     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1073     for (size_t channels = 1; channels <= 40; channels += 7) {
1074       DWConvMicrokernelTester()
1075         .cr(8)
1076         .kr(9)
1077         .channels(8)
1078         .width(5)
1079         .output_stride(43)
1080         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2);
1081     }
1082   }
1083 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,multipixel_with_qmin)1084   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
1085     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1086     for (size_t channels = 1; channels <= 40; channels += 7) {
1087       DWConvMicrokernelTester()
1088         .cr(8)
1089         .kr(9)
1090         .channels(channels)
1091         .width(3)
1092         .qmin(128)
1093         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2);
1094     }
1095   }
1096 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,multipixel_with_qmax)1097   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
1098     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1099     for (size_t channels = 1; channels <= 40; channels += 7) {
1100       DWConvMicrokernelTester()
1101         .cr(8)
1102         .kr(9)
1103         .channels(channels)
1104         .width(3)
1105         .qmax(128)
1106         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2);
1107     }
1108   }
1109 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,input_offset)1110   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, input_offset) {
1111     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1112     for (uint32_t channels = 16; channels < 128; channels += 24) {
1113       DWConvMicrokernelTester()
1114         .cr(8)
1115         .kr(9)
1116         .channels(channels)
1117         .input_offset(176)
1118         .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2);
1119     }
1120   }
1121 
TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,zero)1122   TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, zero) {
1123     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1124     for (uint32_t mz = 0; mz < 9; mz++) {
1125       for (uint32_t channels = 16; channels < 128; channels += 24) {
1126         DWConvMicrokernelTester()
1127           .cr(8)
1128           .kr(9)
1129           .channels(channels)
1130           .input_offset(176)
1131           .zero_index(mz)
1132           .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2);
1133       }
1134     }
1135   }
1136 #endif  // XNN_ARCH_ARM64
1137 
1138 
1139 #if XNN_ARCH_ARM64
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_eq_16)1140   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_eq_16) {
1141     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1142     DWConvMicrokernelTester()
1143       .cr(16)
1144       .kr(9)
1145       .channels(16)
1146       .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith);
1147   }
1148 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_div_16)1149   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_div_16) {
1150     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1151     for (uint32_t channels = 32; channels < 256; channels += 48) {
1152       DWConvMicrokernelTester()
1153         .cr(16)
1154         .kr(9)
1155         .channels(channels)
1156         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith);
1157     }
1158   }
1159 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_div_16_with_qmin)1160   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_div_16_with_qmin) {
1161     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1162     for (uint32_t channels = 32; channels < 256; channels += 48) {
1163       DWConvMicrokernelTester()
1164         .cr(16)
1165         .kr(9)
1166         .channels(channels)
1167         .qmin(128)
1168         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith);
1169     }
1170   }
1171 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_div_16_with_qmax)1172   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_div_16_with_qmax) {
1173     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1174     for (uint32_t channels = 32; channels < 256; channels += 48) {
1175       DWConvMicrokernelTester()
1176         .cr(16)
1177         .kr(9)
1178         .channels(channels)
1179         .qmax(128)
1180         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith);
1181     }
1182   }
1183 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_lt_16)1184   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_lt_16) {
1185     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1186     for (uint32_t channels = 1; channels < 16; channels++) {
1187       DWConvMicrokernelTester()
1188         .cr(16)
1189         .kr(9)
1190         .channels(channels)
1191         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith);
1192     }
1193   }
1194 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_gt_16)1195   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_gt_16) {
1196     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1197     for (uint32_t channels = 17; channels < 32; channels++) {
1198       DWConvMicrokernelTester()
1199         .cr(16)
1200         .kr(9)
1201         .channels(channels)
1202         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith);
1203     }
1204   }
1205 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_gt_16_with_qmin)1206   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_gt_16_with_qmin) {
1207     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1208     for (uint32_t channels = 17; channels < 32; channels++) {
1209       DWConvMicrokernelTester()
1210         .cr(16)
1211         .kr(9)
1212         .channels(channels)
1213         .qmin(128)
1214         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith);
1215     }
1216   }
1217 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_gt_16_with_qmax)1218   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_gt_16_with_qmax) {
1219     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1220     for (uint32_t channels = 17; channels < 32; channels++) {
1221       DWConvMicrokernelTester()
1222         .cr(16)
1223         .kr(9)
1224         .channels(channels)
1225         .qmax(128)
1226         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith);
1227     }
1228   }
1229 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,multipixel)1230   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, multipixel) {
1231     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1232     for (size_t channels = 1; channels <= 80; channels += 15) {
1233       DWConvMicrokernelTester()
1234         .cr(16)
1235         .kr(9)
1236         .channels(channels)
1237         .width(3)
1238         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith);
1239     }
1240   }
1241 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,multipixel_with_step)1242   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, multipixel_with_step) {
1243     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1244     for (size_t channels = 1; channels <= 80; channels += 15) {
1245       for (size_t step = 2; step <= 9; step++) {
1246         DWConvMicrokernelTester()
1247           .cr(16)
1248           .kr(9)
1249           .channels(channels)
1250           .width(3)
1251           .step(step)
1252           .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith);
1253       }
1254     }
1255   }
1256 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,multipixel_with_output_stride)1257   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, multipixel_with_output_stride) {
1258     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1259     for (size_t channels = 1; channels <= 80; channels += 15) {
1260       DWConvMicrokernelTester()
1261         .cr(16)
1262         .kr(9)
1263         .channels(16)
1264         .width(5)
1265         .output_stride(83)
1266         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith);
1267     }
1268   }
1269 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,multipixel_with_qmin)1270   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, multipixel_with_qmin) {
1271     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1272     for (size_t channels = 1; channels <= 80; channels += 15) {
1273       DWConvMicrokernelTester()
1274         .cr(16)
1275         .kr(9)
1276         .channels(channels)
1277         .width(3)
1278         .qmin(128)
1279         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith);
1280     }
1281   }
1282 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,multipixel_with_qmax)1283   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, multipixel_with_qmax) {
1284     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1285     for (size_t channels = 1; channels <= 80; channels += 15) {
1286       DWConvMicrokernelTester()
1287         .cr(16)
1288         .kr(9)
1289         .channels(channels)
1290         .width(3)
1291         .qmax(128)
1292         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith);
1293     }
1294   }
1295 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,input_offset)1296   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, input_offset) {
1297     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1298     for (uint32_t channels = 32; channels < 256; channels += 48) {
1299       DWConvMicrokernelTester()
1300         .cr(16)
1301         .kr(9)
1302         .channels(channels)
1303         .input_offset(304)
1304         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith);
1305     }
1306   }
1307 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,zero)1308   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, zero) {
1309     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1310     for (uint32_t mz = 0; mz < 9; mz++) {
1311       for (uint32_t channels = 32; channels < 256; channels += 48) {
1312         DWConvMicrokernelTester()
1313           .cr(16)
1314           .kr(9)
1315           .channels(channels)
1316           .input_offset(304)
1317           .zero_index(mz)
1318           .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith);
1319       }
1320     }
1321   }
1322 #endif  // XNN_ARCH_ARM64
1323 
1324 
1325 #if XNN_ARCH_ARM64
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_eq_16)1326   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_eq_16) {
1327     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1328     DWConvMicrokernelTester()
1329       .cr(16)
1330       .kr(9)
1331       .channels(16)
1332       .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2);
1333   }
1334 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_div_16)1335   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_div_16) {
1336     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1337     for (uint32_t channels = 32; channels < 256; channels += 48) {
1338       DWConvMicrokernelTester()
1339         .cr(16)
1340         .kr(9)
1341         .channels(channels)
1342         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2);
1343     }
1344   }
1345 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_div_16_with_qmin)1346   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_div_16_with_qmin) {
1347     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1348     for (uint32_t channels = 32; channels < 256; channels += 48) {
1349       DWConvMicrokernelTester()
1350         .cr(16)
1351         .kr(9)
1352         .channels(channels)
1353         .qmin(128)
1354         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2);
1355     }
1356   }
1357 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_div_16_with_qmax)1358   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_div_16_with_qmax) {
1359     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1360     for (uint32_t channels = 32; channels < 256; channels += 48) {
1361       DWConvMicrokernelTester()
1362         .cr(16)
1363         .kr(9)
1364         .channels(channels)
1365         .qmax(128)
1366         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2);
1367     }
1368   }
1369 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_lt_16)1370   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_lt_16) {
1371     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1372     for (uint32_t channels = 1; channels < 16; channels++) {
1373       DWConvMicrokernelTester()
1374         .cr(16)
1375         .kr(9)
1376         .channels(channels)
1377         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2);
1378     }
1379   }
1380 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_gt_16)1381   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_gt_16) {
1382     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1383     for (uint32_t channels = 17; channels < 32; channels++) {
1384       DWConvMicrokernelTester()
1385         .cr(16)
1386         .kr(9)
1387         .channels(channels)
1388         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2);
1389     }
1390   }
1391 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_gt_16_with_qmin)1392   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_gt_16_with_qmin) {
1393     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1394     for (uint32_t channels = 17; channels < 32; channels++) {
1395       DWConvMicrokernelTester()
1396         .cr(16)
1397         .kr(9)
1398         .channels(channels)
1399         .qmin(128)
1400         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2);
1401     }
1402   }
1403 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_gt_16_with_qmax)1404   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_gt_16_with_qmax) {
1405     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1406     for (uint32_t channels = 17; channels < 32; channels++) {
1407       DWConvMicrokernelTester()
1408         .cr(16)
1409         .kr(9)
1410         .channels(channels)
1411         .qmax(128)
1412         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2);
1413     }
1414   }
1415 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,multipixel)1416   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, multipixel) {
1417     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1418     for (size_t channels = 1; channels <= 80; channels += 15) {
1419       DWConvMicrokernelTester()
1420         .cr(16)
1421         .kr(9)
1422         .channels(channels)
1423         .width(3)
1424         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2);
1425     }
1426   }
1427 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,multipixel_with_step)1428   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, multipixel_with_step) {
1429     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1430     for (size_t channels = 1; channels <= 80; channels += 15) {
1431       for (size_t step = 2; step <= 9; step++) {
1432         DWConvMicrokernelTester()
1433           .cr(16)
1434           .kr(9)
1435           .channels(channels)
1436           .width(3)
1437           .step(step)
1438           .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2);
1439       }
1440     }
1441   }
1442 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,multipixel_with_output_stride)1443   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
1444     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1445     for (size_t channels = 1; channels <= 80; channels += 15) {
1446       DWConvMicrokernelTester()
1447         .cr(16)
1448         .kr(9)
1449         .channels(16)
1450         .width(5)
1451         .output_stride(83)
1452         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2);
1453     }
1454   }
1455 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,multipixel_with_qmin)1456   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
1457     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1458     for (size_t channels = 1; channels <= 80; channels += 15) {
1459       DWConvMicrokernelTester()
1460         .cr(16)
1461         .kr(9)
1462         .channels(channels)
1463         .width(3)
1464         .qmin(128)
1465         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2);
1466     }
1467   }
1468 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,multipixel_with_qmax)1469   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
1470     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1471     for (size_t channels = 1; channels <= 80; channels += 15) {
1472       DWConvMicrokernelTester()
1473         .cr(16)
1474         .kr(9)
1475         .channels(channels)
1476         .width(3)
1477         .qmax(128)
1478         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2);
1479     }
1480   }
1481 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,input_offset)1482   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, input_offset) {
1483     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1484     for (uint32_t channels = 32; channels < 256; channels += 48) {
1485       DWConvMicrokernelTester()
1486         .cr(16)
1487         .kr(9)
1488         .channels(channels)
1489         .input_offset(304)
1490         .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2);
1491     }
1492   }
1493 
TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,zero)1494   TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, zero) {
1495     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1496     for (uint32_t mz = 0; mz < 9; mz++) {
1497       for (uint32_t channels = 32; channels < 256; channels += 48) {
1498         DWConvMicrokernelTester()
1499           .cr(16)
1500           .kr(9)
1501           .channels(channels)
1502           .input_offset(304)
1503           .zero_index(mz)
1504           .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2);
1505       }
1506     }
1507   }
1508 #endif  // XNN_ARCH_ARM64
1509 
1510 
1511 #if XNN_ARCH_ARM64
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_eq_8)1512   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_eq_8) {
1513     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1514     DWConvMicrokernelTester()
1515       .cr(8)
1516       .kr(4)
1517       .channels(8)
1518       .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith);
1519   }
1520 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_div_8)1521   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_div_8) {
1522     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1523     for (uint32_t channels = 16; channels < 128; channels += 24) {
1524       DWConvMicrokernelTester()
1525         .cr(8)
1526         .kr(4)
1527         .channels(channels)
1528         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith);
1529     }
1530   }
1531 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_div_8_with_qmin)1532   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_div_8_with_qmin) {
1533     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1534     for (uint32_t channels = 16; channels < 128; channels += 24) {
1535       DWConvMicrokernelTester()
1536         .cr(8)
1537         .kr(4)
1538         .channels(channels)
1539         .qmin(128)
1540         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith);
1541     }
1542   }
1543 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_div_8_with_qmax)1544   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_div_8_with_qmax) {
1545     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1546     for (uint32_t channels = 16; channels < 128; channels += 24) {
1547       DWConvMicrokernelTester()
1548         .cr(8)
1549         .kr(4)
1550         .channels(channels)
1551         .qmax(128)
1552         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith);
1553     }
1554   }
1555 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_lt_8)1556   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_lt_8) {
1557     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1558     for (uint32_t channels = 1; channels < 8; channels++) {
1559       DWConvMicrokernelTester()
1560         .cr(8)
1561         .kr(4)
1562         .channels(channels)
1563         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith);
1564     }
1565   }
1566 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_gt_8)1567   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_gt_8) {
1568     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1569     for (uint32_t channels = 9; channels < 16; channels++) {
1570       DWConvMicrokernelTester()
1571         .cr(8)
1572         .kr(4)
1573         .channels(channels)
1574         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith);
1575     }
1576   }
1577 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_gt_8_with_qmin)1578   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_gt_8_with_qmin) {
1579     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1580     for (uint32_t channels = 9; channels < 16; channels++) {
1581       DWConvMicrokernelTester()
1582         .cr(8)
1583         .kr(4)
1584         .channels(channels)
1585         .qmin(128)
1586         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith);
1587     }
1588   }
1589 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_gt_8_with_qmax)1590   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_gt_8_with_qmax) {
1591     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1592     for (uint32_t channels = 9; channels < 16; channels++) {
1593       DWConvMicrokernelTester()
1594         .cr(8)
1595         .kr(4)
1596         .channels(channels)
1597         .qmax(128)
1598         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith);
1599     }
1600   }
1601 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,multipixel)1602   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, multipixel) {
1603     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1604     for (size_t channels = 1; channels <= 40; channels += 7) {
1605       DWConvMicrokernelTester()
1606         .cr(8)
1607         .kr(4)
1608         .channels(channels)
1609         .width(3)
1610         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith);
1611     }
1612   }
1613 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,multipixel_with_step)1614   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, multipixel_with_step) {
1615     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1616     for (size_t channels = 1; channels <= 40; channels += 7) {
1617       for (size_t step = 2; step <= 4; step++) {
1618         DWConvMicrokernelTester()
1619           .cr(8)
1620           .kr(4)
1621           .channels(channels)
1622           .width(3)
1623           .step(step)
1624           .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith);
1625       }
1626     }
1627   }
1628 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,multipixel_with_output_stride)1629   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, multipixel_with_output_stride) {
1630     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1631     for (size_t channels = 1; channels <= 40; channels += 7) {
1632       DWConvMicrokernelTester()
1633         .cr(8)
1634         .kr(4)
1635         .channels(8)
1636         .width(5)
1637         .output_stride(43)
1638         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith);
1639     }
1640   }
1641 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,multipixel_with_qmin)1642   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, multipixel_with_qmin) {
1643     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1644     for (size_t channels = 1; channels <= 40; channels += 7) {
1645       DWConvMicrokernelTester()
1646         .cr(8)
1647         .kr(4)
1648         .channels(channels)
1649         .width(3)
1650         .qmin(128)
1651         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith);
1652     }
1653   }
1654 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,multipixel_with_qmax)1655   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, multipixel_with_qmax) {
1656     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1657     for (size_t channels = 1; channels <= 40; channels += 7) {
1658       DWConvMicrokernelTester()
1659         .cr(8)
1660         .kr(4)
1661         .channels(channels)
1662         .width(3)
1663         .qmax(128)
1664         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith);
1665     }
1666   }
1667 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,input_offset)1668   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, input_offset) {
1669     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1670     for (uint32_t channels = 16; channels < 128; channels += 24) {
1671       DWConvMicrokernelTester()
1672         .cr(8)
1673         .kr(4)
1674         .channels(channels)
1675         .input_offset(176)
1676         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith);
1677     }
1678   }
1679 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,zero)1680   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, zero) {
1681     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1682     for (uint32_t mz = 0; mz < 4; mz++) {
1683       for (uint32_t channels = 16; channels < 128; channels += 24) {
1684         DWConvMicrokernelTester()
1685           .cr(8)
1686           .kr(4)
1687           .channels(channels)
1688           .input_offset(176)
1689           .zero_index(mz)
1690           .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith);
1691       }
1692     }
1693   }
1694 #endif  // XNN_ARCH_ARM64
1695 
1696 
1697 #if XNN_ARCH_ARM64
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_eq_8)1698   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_eq_8) {
1699     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1700     DWConvMicrokernelTester()
1701       .cr(8)
1702       .kr(4)
1703       .channels(8)
1704       .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2);
1705   }
1706 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_div_8)1707   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_div_8) {
1708     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1709     for (uint32_t channels = 16; channels < 128; channels += 24) {
1710       DWConvMicrokernelTester()
1711         .cr(8)
1712         .kr(4)
1713         .channels(channels)
1714         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2);
1715     }
1716   }
1717 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_div_8_with_qmin)1718   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_div_8_with_qmin) {
1719     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1720     for (uint32_t channels = 16; channels < 128; channels += 24) {
1721       DWConvMicrokernelTester()
1722         .cr(8)
1723         .kr(4)
1724         .channels(channels)
1725         .qmin(128)
1726         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2);
1727     }
1728   }
1729 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_div_8_with_qmax)1730   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_div_8_with_qmax) {
1731     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1732     for (uint32_t channels = 16; channels < 128; channels += 24) {
1733       DWConvMicrokernelTester()
1734         .cr(8)
1735         .kr(4)
1736         .channels(channels)
1737         .qmax(128)
1738         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2);
1739     }
1740   }
1741 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_lt_8)1742   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_lt_8) {
1743     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1744     for (uint32_t channels = 1; channels < 8; channels++) {
1745       DWConvMicrokernelTester()
1746         .cr(8)
1747         .kr(4)
1748         .channels(channels)
1749         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2);
1750     }
1751   }
1752 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_gt_8)1753   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_gt_8) {
1754     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1755     for (uint32_t channels = 9; channels < 16; channels++) {
1756       DWConvMicrokernelTester()
1757         .cr(8)
1758         .kr(4)
1759         .channels(channels)
1760         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2);
1761     }
1762   }
1763 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_gt_8_with_qmin)1764   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_gt_8_with_qmin) {
1765     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1766     for (uint32_t channels = 9; channels < 16; channels++) {
1767       DWConvMicrokernelTester()
1768         .cr(8)
1769         .kr(4)
1770         .channels(channels)
1771         .qmin(128)
1772         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2);
1773     }
1774   }
1775 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_gt_8_with_qmax)1776   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_gt_8_with_qmax) {
1777     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1778     for (uint32_t channels = 9; channels < 16; channels++) {
1779       DWConvMicrokernelTester()
1780         .cr(8)
1781         .kr(4)
1782         .channels(channels)
1783         .qmax(128)
1784         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2);
1785     }
1786   }
1787 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,multipixel)1788   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, multipixel) {
1789     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1790     for (size_t channels = 1; channels <= 40; channels += 7) {
1791       DWConvMicrokernelTester()
1792         .cr(8)
1793         .kr(4)
1794         .channels(channels)
1795         .width(3)
1796         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2);
1797     }
1798   }
1799 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,multipixel_with_step)1800   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, multipixel_with_step) {
1801     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1802     for (size_t channels = 1; channels <= 40; channels += 7) {
1803       for (size_t step = 2; step <= 4; step++) {
1804         DWConvMicrokernelTester()
1805           .cr(8)
1806           .kr(4)
1807           .channels(channels)
1808           .width(3)
1809           .step(step)
1810           .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2);
1811       }
1812     }
1813   }
1814 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,multipixel_with_output_stride)1815   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
1816     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1817     for (size_t channels = 1; channels <= 40; channels += 7) {
1818       DWConvMicrokernelTester()
1819         .cr(8)
1820         .kr(4)
1821         .channels(8)
1822         .width(5)
1823         .output_stride(43)
1824         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2);
1825     }
1826   }
1827 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,multipixel_with_qmin)1828   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
1829     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1830     for (size_t channels = 1; channels <= 40; channels += 7) {
1831       DWConvMicrokernelTester()
1832         .cr(8)
1833         .kr(4)
1834         .channels(channels)
1835         .width(3)
1836         .qmin(128)
1837         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2);
1838     }
1839   }
1840 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,multipixel_with_qmax)1841   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
1842     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1843     for (size_t channels = 1; channels <= 40; channels += 7) {
1844       DWConvMicrokernelTester()
1845         .cr(8)
1846         .kr(4)
1847         .channels(channels)
1848         .width(3)
1849         .qmax(128)
1850         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2);
1851     }
1852   }
1853 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,input_offset)1854   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, input_offset) {
1855     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1856     for (uint32_t channels = 16; channels < 128; channels += 24) {
1857       DWConvMicrokernelTester()
1858         .cr(8)
1859         .kr(4)
1860         .channels(channels)
1861         .input_offset(176)
1862         .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2);
1863     }
1864   }
1865 
TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,zero)1866   TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, zero) {
1867     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1868     for (uint32_t mz = 0; mz < 4; mz++) {
1869       for (uint32_t channels = 16; channels < 128; channels += 24) {
1870         DWConvMicrokernelTester()
1871           .cr(8)
1872           .kr(4)
1873           .channels(channels)
1874           .input_offset(176)
1875           .zero_index(mz)
1876           .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2);
1877       }
1878     }
1879   }
1880 #endif  // XNN_ARCH_ARM64
1881 
1882 
1883 #if XNN_ARCH_ARM64
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_eq_16)1884   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_eq_16) {
1885     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1886     DWConvMicrokernelTester()
1887       .cr(16)
1888       .kr(4)
1889       .channels(16)
1890       .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith);
1891   }
1892 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_div_16)1893   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_div_16) {
1894     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1895     for (uint32_t channels = 32; channels < 256; channels += 48) {
1896       DWConvMicrokernelTester()
1897         .cr(16)
1898         .kr(4)
1899         .channels(channels)
1900         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith);
1901     }
1902   }
1903 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_div_16_with_qmin)1904   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_div_16_with_qmin) {
1905     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1906     for (uint32_t channels = 32; channels < 256; channels += 48) {
1907       DWConvMicrokernelTester()
1908         .cr(16)
1909         .kr(4)
1910         .channels(channels)
1911         .qmin(128)
1912         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith);
1913     }
1914   }
1915 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_div_16_with_qmax)1916   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_div_16_with_qmax) {
1917     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1918     for (uint32_t channels = 32; channels < 256; channels += 48) {
1919       DWConvMicrokernelTester()
1920         .cr(16)
1921         .kr(4)
1922         .channels(channels)
1923         .qmax(128)
1924         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith);
1925     }
1926   }
1927 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_lt_16)1928   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_lt_16) {
1929     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1930     for (uint32_t channels = 1; channels < 16; channels++) {
1931       DWConvMicrokernelTester()
1932         .cr(16)
1933         .kr(4)
1934         .channels(channels)
1935         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith);
1936     }
1937   }
1938 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_gt_16)1939   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_gt_16) {
1940     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1941     for (uint32_t channels = 17; channels < 32; channels++) {
1942       DWConvMicrokernelTester()
1943         .cr(16)
1944         .kr(4)
1945         .channels(channels)
1946         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith);
1947     }
1948   }
1949 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_gt_16_with_qmin)1950   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_gt_16_with_qmin) {
1951     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1952     for (uint32_t channels = 17; channels < 32; channels++) {
1953       DWConvMicrokernelTester()
1954         .cr(16)
1955         .kr(4)
1956         .channels(channels)
1957         .qmin(128)
1958         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith);
1959     }
1960   }
1961 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_gt_16_with_qmax)1962   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_gt_16_with_qmax) {
1963     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1964     for (uint32_t channels = 17; channels < 32; channels++) {
1965       DWConvMicrokernelTester()
1966         .cr(16)
1967         .kr(4)
1968         .channels(channels)
1969         .qmax(128)
1970         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith);
1971     }
1972   }
1973 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,multipixel)1974   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, multipixel) {
1975     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1976     for (size_t channels = 1; channels <= 80; channels += 15) {
1977       DWConvMicrokernelTester()
1978         .cr(16)
1979         .kr(4)
1980         .channels(channels)
1981         .width(3)
1982         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith);
1983     }
1984   }
1985 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,multipixel_with_step)1986   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, multipixel_with_step) {
1987     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1988     for (size_t channels = 1; channels <= 80; channels += 15) {
1989       for (size_t step = 2; step <= 4; step++) {
1990         DWConvMicrokernelTester()
1991           .cr(16)
1992           .kr(4)
1993           .channels(channels)
1994           .width(3)
1995           .step(step)
1996           .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith);
1997       }
1998     }
1999   }
2000 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,multipixel_with_output_stride)2001   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, multipixel_with_output_stride) {
2002     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2003     for (size_t channels = 1; channels <= 80; channels += 15) {
2004       DWConvMicrokernelTester()
2005         .cr(16)
2006         .kr(4)
2007         .channels(16)
2008         .width(5)
2009         .output_stride(83)
2010         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith);
2011     }
2012   }
2013 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,multipixel_with_qmin)2014   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, multipixel_with_qmin) {
2015     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2016     for (size_t channels = 1; channels <= 80; channels += 15) {
2017       DWConvMicrokernelTester()
2018         .cr(16)
2019         .kr(4)
2020         .channels(channels)
2021         .width(3)
2022         .qmin(128)
2023         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith);
2024     }
2025   }
2026 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,multipixel_with_qmax)2027   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, multipixel_with_qmax) {
2028     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2029     for (size_t channels = 1; channels <= 80; channels += 15) {
2030       DWConvMicrokernelTester()
2031         .cr(16)
2032         .kr(4)
2033         .channels(channels)
2034         .width(3)
2035         .qmax(128)
2036         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith);
2037     }
2038   }
2039 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,input_offset)2040   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, input_offset) {
2041     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2042     for (uint32_t channels = 32; channels < 256; channels += 48) {
2043       DWConvMicrokernelTester()
2044         .cr(16)
2045         .kr(4)
2046         .channels(channels)
2047         .input_offset(304)
2048         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith);
2049     }
2050   }
2051 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,zero)2052   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, zero) {
2053     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2054     for (uint32_t mz = 0; mz < 4; mz++) {
2055       for (uint32_t channels = 32; channels < 256; channels += 48) {
2056         DWConvMicrokernelTester()
2057           .cr(16)
2058           .kr(4)
2059           .channels(channels)
2060           .input_offset(304)
2061           .zero_index(mz)
2062           .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith);
2063       }
2064     }
2065   }
2066 #endif  // XNN_ARCH_ARM64
2067 
2068 
2069 #if XNN_ARCH_ARM64
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_eq_16)2070   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_eq_16) {
2071     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2072     DWConvMicrokernelTester()
2073       .cr(16)
2074       .kr(4)
2075       .channels(16)
2076       .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2);
2077   }
2078 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_div_16)2079   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_div_16) {
2080     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2081     for (uint32_t channels = 32; channels < 256; channels += 48) {
2082       DWConvMicrokernelTester()
2083         .cr(16)
2084         .kr(4)
2085         .channels(channels)
2086         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2);
2087     }
2088   }
2089 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_div_16_with_qmin)2090   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_div_16_with_qmin) {
2091     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2092     for (uint32_t channels = 32; channels < 256; channels += 48) {
2093       DWConvMicrokernelTester()
2094         .cr(16)
2095         .kr(4)
2096         .channels(channels)
2097         .qmin(128)
2098         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2);
2099     }
2100   }
2101 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_div_16_with_qmax)2102   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_div_16_with_qmax) {
2103     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2104     for (uint32_t channels = 32; channels < 256; channels += 48) {
2105       DWConvMicrokernelTester()
2106         .cr(16)
2107         .kr(4)
2108         .channels(channels)
2109         .qmax(128)
2110         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2);
2111     }
2112   }
2113 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_lt_16)2114   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_lt_16) {
2115     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2116     for (uint32_t channels = 1; channels < 16; channels++) {
2117       DWConvMicrokernelTester()
2118         .cr(16)
2119         .kr(4)
2120         .channels(channels)
2121         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2);
2122     }
2123   }
2124 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_gt_16)2125   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_gt_16) {
2126     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2127     for (uint32_t channels = 17; channels < 32; channels++) {
2128       DWConvMicrokernelTester()
2129         .cr(16)
2130         .kr(4)
2131         .channels(channels)
2132         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2);
2133     }
2134   }
2135 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_gt_16_with_qmin)2136   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_gt_16_with_qmin) {
2137     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2138     for (uint32_t channels = 17; channels < 32; channels++) {
2139       DWConvMicrokernelTester()
2140         .cr(16)
2141         .kr(4)
2142         .channels(channels)
2143         .qmin(128)
2144         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2);
2145     }
2146   }
2147 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_gt_16_with_qmax)2148   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_gt_16_with_qmax) {
2149     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2150     for (uint32_t channels = 17; channels < 32; channels++) {
2151       DWConvMicrokernelTester()
2152         .cr(16)
2153         .kr(4)
2154         .channels(channels)
2155         .qmax(128)
2156         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2);
2157     }
2158   }
2159 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,multipixel)2160   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, multipixel) {
2161     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2162     for (size_t channels = 1; channels <= 80; channels += 15) {
2163       DWConvMicrokernelTester()
2164         .cr(16)
2165         .kr(4)
2166         .channels(channels)
2167         .width(3)
2168         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2);
2169     }
2170   }
2171 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,multipixel_with_step)2172   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, multipixel_with_step) {
2173     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2174     for (size_t channels = 1; channels <= 80; channels += 15) {
2175       for (size_t step = 2; step <= 4; step++) {
2176         DWConvMicrokernelTester()
2177           .cr(16)
2178           .kr(4)
2179           .channels(channels)
2180           .width(3)
2181           .step(step)
2182           .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2);
2183       }
2184     }
2185   }
2186 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,multipixel_with_output_stride)2187   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, multipixel_with_output_stride) {
2188     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2189     for (size_t channels = 1; channels <= 80; channels += 15) {
2190       DWConvMicrokernelTester()
2191         .cr(16)
2192         .kr(4)
2193         .channels(16)
2194         .width(5)
2195         .output_stride(83)
2196         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2);
2197     }
2198   }
2199 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,multipixel_with_qmin)2200   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, multipixel_with_qmin) {
2201     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2202     for (size_t channels = 1; channels <= 80; channels += 15) {
2203       DWConvMicrokernelTester()
2204         .cr(16)
2205         .kr(4)
2206         .channels(channels)
2207         .width(3)
2208         .qmin(128)
2209         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2);
2210     }
2211   }
2212 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,multipixel_with_qmax)2213   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, multipixel_with_qmax) {
2214     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2215     for (size_t channels = 1; channels <= 80; channels += 15) {
2216       DWConvMicrokernelTester()
2217         .cr(16)
2218         .kr(4)
2219         .channels(channels)
2220         .width(3)
2221         .qmax(128)
2222         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2);
2223     }
2224   }
2225 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,input_offset)2226   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, input_offset) {
2227     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2228     for (uint32_t channels = 32; channels < 256; channels += 48) {
2229       DWConvMicrokernelTester()
2230         .cr(16)
2231         .kr(4)
2232         .channels(channels)
2233         .input_offset(304)
2234         .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2);
2235     }
2236   }
2237 
TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,zero)2238   TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, zero) {
2239     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
2240     for (uint32_t mz = 0; mz < 4; mz++) {
2241       for (uint32_t channels = 32; channels < 256; channels += 48) {
2242         DWConvMicrokernelTester()
2243           .cr(16)
2244           .kr(4)
2245           .channels(channels)
2246           .input_offset(304)
2247           .zero_index(mz)
2248           .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2);
2249       }
2250     }
2251   }
2252 #endif  // XNN_ARCH_ARM64
2253