• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <gtest/gtest.h>
7 
8 #include <xnnpack/common.h>
9 #include <xnnpack/isa-checks.h>
10 
11 #include <xnnpack/conv.h>
12 #include "conv-hwc2chw-microkernel-tester.h"
13 
14 
15 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,input_width_eq_4)16   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, input_width_eq_4) {
17     TEST_REQUIRES_ARM_NEON;
18     ConvHWC2CHWMicrokernelTester()
19       .kernel_size(3)
20       .subsampling(2)
21       .padding_width(1)
22       .input_channels(3)
23       .output_channels_tile(4)
24       .output_channels(4)
25       .input_width(4)
26       .input_height(3)
27       .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
28   }
29 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,input_width_div_4)30   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, input_width_div_4) {
31     TEST_REQUIRES_ARM_NEON;
32     for (size_t input_width = 8; input_width <= 32; input_width += 12) {
33       ConvHWC2CHWMicrokernelTester()
34         .kernel_size(3)
35         .subsampling(2)
36         .padding_width(1)
37         .input_channels(3)
38         .output_channels_tile(4)
39         .output_channels(4)
40         .input_width(input_width)
41         .input_height(3)
42         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
43     }
44   }
45 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,input_width_lt_4)46   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, input_width_lt_4) {
47     TEST_REQUIRES_ARM_NEON;
48     for (size_t input_width = 1; input_width < 4; input_width++) {
49       ConvHWC2CHWMicrokernelTester()
50         .kernel_size(3)
51         .subsampling(2)
52         .padding_width(1)
53         .input_channels(3)
54         .output_channels_tile(4)
55         .output_channels(4)
56         .input_width(input_width)
57         .input_height(3)
58         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
59     }
60   }
61 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,input_width_gt_4)62   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, input_width_gt_4) {
63     TEST_REQUIRES_ARM_NEON;
64     for (size_t input_width = 5; input_width < 8; input_width++) {
65       ConvHWC2CHWMicrokernelTester()
66         .kernel_size(3)
67         .subsampling(2)
68         .padding_width(1)
69         .input_channels(3)
70         .output_channels_tile(4)
71         .output_channels(4)
72         .input_width(input_width)
73         .input_height(3)
74         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
75     }
76   }
77 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,output_channels_lt_4)78   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, output_channels_lt_4) {
79     TEST_REQUIRES_ARM_NEON;
80     for (size_t output_channels = 1; output_channels < 4; output_channels++) {
81       for (size_t input_width = 1; input_width < 32; input_width += 7) {
82         ConvHWC2CHWMicrokernelTester()
83           .kernel_size(3)
84           .subsampling(2)
85           .padding_width(1)
86           .input_channels(3)
87           .output_channels_tile(4)
88           .output_channels(output_channels)
89           .input_width(input_width)
90           .input_height(3)
91           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
92       }
93     }
94   }
95 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,output_channels_div_4)96   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, output_channels_div_4) {
97     TEST_REQUIRES_ARM_NEON;
98     for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
99       for (size_t input_width = 1; input_width < 32; input_width += 7) {
100         ConvHWC2CHWMicrokernelTester()
101           .kernel_size(3)
102           .subsampling(2)
103           .padding_width(1)
104           .input_channels(3)
105           .output_channels_tile(4)
106           .output_channels(output_channels)
107           .input_width(input_width)
108           .input_height(3)
109           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
110       }
111     }
112   }
113 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,output_channels_gt_4)114   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, output_channels_gt_4) {
115     TEST_REQUIRES_ARM_NEON;
116     for (size_t output_channels = 5; output_channels < 8; output_channels++) {
117       for (size_t input_width = 1; input_width < 32; input_width += 7) {
118         ConvHWC2CHWMicrokernelTester()
119           .kernel_size(3)
120           .subsampling(2)
121           .padding_width(1)
122           .input_channels(3)
123           .output_channels_tile(4)
124           .output_channels(output_channels)
125           .input_width(input_width)
126           .input_height(3)
127           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
128       }
129     }
130   }
131 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,input_height_lt_3)132   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, input_height_lt_3) {
133     TEST_REQUIRES_ARM_NEON;
134     for (size_t input_height = 1; input_height < 3; input_height++) {
135       for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
136         for (size_t input_width = 1; input_width < 32; input_width += 7) {
137           ConvHWC2CHWMicrokernelTester()
138             .kernel_size(3)
139             .subsampling(2)
140             .padding(1)
141             .input_channels(3) // padded input height of at least 3 required
142             .output_channels_tile(4)
143             .output_channels(output_channels)
144             .input_width(input_width)
145             .input_height(input_height)
146             .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
147         }
148       }
149     }
150   }
151 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,input_height_gt_3)152   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, input_height_gt_3) {
153     TEST_REQUIRES_ARM_NEON;
154     for (size_t input_height = 4; input_height <= 9; input_height++) {
155       for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
156         for (size_t input_width = 1; input_width < 32; input_width += 7) {
157           ConvHWC2CHWMicrokernelTester()
158             .kernel_size(3)
159             .subsampling(2)
160             .padding_width(1)
161             .input_channels(3)
162             .output_channels_tile(4)
163             .output_channels(output_channels)
164             .input_width(input_width)
165             .input_height(input_height)
166             .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
167         }
168       }
169     }
170   }
171 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,padding_top)172   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, padding_top) {
173     TEST_REQUIRES_ARM_NEON;
174     for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
175       for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
176         for (size_t input_width = 1; input_width < 32; input_width += 7) {
177           ConvHWC2CHWMicrokernelTester()
178             .kernel_size(3)
179             .subsampling(2)
180             .padding_width(1)
181             .padding_top(padding_top)
182             .input_channels(3)
183             .output_channels_tile(4)
184             .output_channels(output_channels)
185             .input_width(input_width)
186             .input_height(9)
187             .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
188         }
189       }
190     }
191   }
192 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,padding_bottom)193   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, padding_bottom) {
194     TEST_REQUIRES_ARM_NEON;
195     for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
196       for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
197         for (size_t input_width = 1; input_width < 32; input_width += 7) {
198           ConvHWC2CHWMicrokernelTester()
199             .kernel_size(3)
200             .subsampling(2)
201             .padding_width(1)
202             .padding_bottom(padding_bottom)
203             .input_channels(3)
204             .output_channels_tile(4)
205             .output_channels(output_channels)
206             .input_width(input_width)
207             .input_height(9)
208             .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
209         }
210       }
211     }
212   }
213 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,output_y_start)214   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, output_y_start) {
215     TEST_REQUIRES_ARM_NEON;
216     for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
217       for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
218         for (size_t input_width = 1; input_width < 32; input_width += 7) {
219           ConvHWC2CHWMicrokernelTester()
220             .kernel_size(3)
221             .subsampling(2)
222             .padding_width(1)
223             .input_channels(3)
224             .output_channels_tile(4)
225             .output_channels(output_channels)
226             .input_width(input_width)
227             .input_height(9)
228             .output_y_start(output_y_start)
229             .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
230         }
231       }
232     }
233   }
234 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,output_y_end)235   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, output_y_end) {
236     TEST_REQUIRES_ARM_NEON;
237     for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
238       for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
239         for (size_t input_width = 1; input_width < 32; input_width += 7) {
240           ConvHWC2CHWMicrokernelTester()
241             .kernel_size(3)
242             .subsampling(2)
243             .padding_width(1)
244             .input_channels(3)
245             .output_channels_tile(4)
246             .output_channels(output_channels)
247             .input_width(input_width)
248             .input_height(9)
249             .output_y_end(output_y_end)
250             .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
251         }
252       }
253     }
254   }
255 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,qmin)256   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, qmin) {
257     TEST_REQUIRES_ARM_NEON;
258     for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
259       for (size_t input_width = 1; input_width < 32; input_width += 7) {
260         ConvHWC2CHWMicrokernelTester()
261           .kernel_size(3)
262           .subsampling(2)
263           .padding_width(1)
264           .input_channels(3)
265           .output_channels_tile(4)
266           .output_channels(output_channels)
267           .input_width(input_width)
268           .input_height(6)
269           .qmin(128)
270           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
271       }
272     }
273   }
274 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,qmax)275   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, qmax) {
276     TEST_REQUIRES_ARM_NEON;
277     for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
278       for (size_t input_width = 1; input_width < 32; input_width += 7) {
279         ConvHWC2CHWMicrokernelTester()
280           .kernel_size(3)
281           .subsampling(2)
282           .padding_width(1)
283           .input_channels(3)
284           .output_channels_tile(4)
285           .output_channels(output_channels)
286           .input_width(input_width)
287           .input_height(6)
288           .qmax(128)
289           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
290       }
291     }
292   }
293 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
294 
295 
296 #if XNN_ARCH_ARM64
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,input_width_eq_4)297   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, input_width_eq_4) {
298     TEST_REQUIRES_ARM_NEON_FMA;
299     ConvHWC2CHWMicrokernelTester()
300       .kernel_size(3)
301       .subsampling(2)
302       .padding_width(1)
303       .input_channels(3)
304       .output_channels_tile(4)
305       .output_channels(4)
306       .input_width(4)
307       .input_height(3)
308       .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
309   }
310 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,input_width_div_4)311   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, input_width_div_4) {
312     TEST_REQUIRES_ARM_NEON_FMA;
313     for (size_t input_width = 8; input_width <= 32; input_width += 12) {
314       ConvHWC2CHWMicrokernelTester()
315         .kernel_size(3)
316         .subsampling(2)
317         .padding_width(1)
318         .input_channels(3)
319         .output_channels_tile(4)
320         .output_channels(4)
321         .input_width(input_width)
322         .input_height(3)
323         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
324     }
325   }
326 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,input_width_lt_4)327   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, input_width_lt_4) {
328     TEST_REQUIRES_ARM_NEON_FMA;
329     for (size_t input_width = 1; input_width < 4; input_width++) {
330       ConvHWC2CHWMicrokernelTester()
331         .kernel_size(3)
332         .subsampling(2)
333         .padding_width(1)
334         .input_channels(3)
335         .output_channels_tile(4)
336         .output_channels(4)
337         .input_width(input_width)
338         .input_height(3)
339         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
340     }
341   }
342 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,input_width_gt_4)343   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, input_width_gt_4) {
344     TEST_REQUIRES_ARM_NEON_FMA;
345     for (size_t input_width = 5; input_width < 8; input_width++) {
346       ConvHWC2CHWMicrokernelTester()
347         .kernel_size(3)
348         .subsampling(2)
349         .padding_width(1)
350         .input_channels(3)
351         .output_channels_tile(4)
352         .output_channels(4)
353         .input_width(input_width)
354         .input_height(3)
355         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
356     }
357   }
358 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,output_channels_lt_4)359   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, output_channels_lt_4) {
360     TEST_REQUIRES_ARM_NEON_FMA;
361     for (size_t output_channels = 1; output_channels < 4; output_channels++) {
362       for (size_t input_width = 1; input_width < 32; input_width += 7) {
363         ConvHWC2CHWMicrokernelTester()
364           .kernel_size(3)
365           .subsampling(2)
366           .padding_width(1)
367           .input_channels(3)
368           .output_channels_tile(4)
369           .output_channels(output_channels)
370           .input_width(input_width)
371           .input_height(3)
372           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
373       }
374     }
375   }
376 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,output_channels_div_4)377   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, output_channels_div_4) {
378     TEST_REQUIRES_ARM_NEON_FMA;
379     for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
380       for (size_t input_width = 1; input_width < 32; input_width += 7) {
381         ConvHWC2CHWMicrokernelTester()
382           .kernel_size(3)
383           .subsampling(2)
384           .padding_width(1)
385           .input_channels(3)
386           .output_channels_tile(4)
387           .output_channels(output_channels)
388           .input_width(input_width)
389           .input_height(3)
390           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
391       }
392     }
393   }
394 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,output_channels_gt_4)395   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, output_channels_gt_4) {
396     TEST_REQUIRES_ARM_NEON_FMA;
397     for (size_t output_channels = 5; output_channels < 8; output_channels++) {
398       for (size_t input_width = 1; input_width < 32; input_width += 7) {
399         ConvHWC2CHWMicrokernelTester()
400           .kernel_size(3)
401           .subsampling(2)
402           .padding_width(1)
403           .input_channels(3)
404           .output_channels_tile(4)
405           .output_channels(output_channels)
406           .input_width(input_width)
407           .input_height(3)
408           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
409       }
410     }
411   }
412 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,input_height_lt_3)413   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, input_height_lt_3) {
414     TEST_REQUIRES_ARM_NEON_FMA;
415     for (size_t input_height = 1; input_height < 3; input_height++) {
416       for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
417         for (size_t input_width = 1; input_width < 32; input_width += 7) {
418           ConvHWC2CHWMicrokernelTester()
419             .kernel_size(3)
420             .subsampling(2)
421             .padding(1)
422             .input_channels(3) // padded input height of at least 3 required
423             .output_channels_tile(4)
424             .output_channels(output_channels)
425             .input_width(input_width)
426             .input_height(input_height)
427             .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
428         }
429       }
430     }
431   }
432 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,input_height_gt_3)433   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, input_height_gt_3) {
434     TEST_REQUIRES_ARM_NEON_FMA;
435     for (size_t input_height = 4; input_height <= 9; input_height++) {
436       for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
437         for (size_t input_width = 1; input_width < 32; input_width += 7) {
438           ConvHWC2CHWMicrokernelTester()
439             .kernel_size(3)
440             .subsampling(2)
441             .padding_width(1)
442             .input_channels(3)
443             .output_channels_tile(4)
444             .output_channels(output_channels)
445             .input_width(input_width)
446             .input_height(input_height)
447             .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
448         }
449       }
450     }
451   }
452 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,padding_top)453   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, padding_top) {
454     TEST_REQUIRES_ARM_NEON_FMA;
455     for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
456       for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
457         for (size_t input_width = 1; input_width < 32; input_width += 7) {
458           ConvHWC2CHWMicrokernelTester()
459             .kernel_size(3)
460             .subsampling(2)
461             .padding_width(1)
462             .padding_top(padding_top)
463             .input_channels(3)
464             .output_channels_tile(4)
465             .output_channels(output_channels)
466             .input_width(input_width)
467             .input_height(9)
468             .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
469         }
470       }
471     }
472   }
473 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,padding_bottom)474   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, padding_bottom) {
475     TEST_REQUIRES_ARM_NEON_FMA;
476     for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
477       for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
478         for (size_t input_width = 1; input_width < 32; input_width += 7) {
479           ConvHWC2CHWMicrokernelTester()
480             .kernel_size(3)
481             .subsampling(2)
482             .padding_width(1)
483             .padding_bottom(padding_bottom)
484             .input_channels(3)
485             .output_channels_tile(4)
486             .output_channels(output_channels)
487             .input_width(input_width)
488             .input_height(9)
489             .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
490         }
491       }
492     }
493   }
494 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,output_y_start)495   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, output_y_start) {
496     TEST_REQUIRES_ARM_NEON_FMA;
497     for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
498       for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
499         for (size_t input_width = 1; input_width < 32; input_width += 7) {
500           ConvHWC2CHWMicrokernelTester()
501             .kernel_size(3)
502             .subsampling(2)
503             .padding_width(1)
504             .input_channels(3)
505             .output_channels_tile(4)
506             .output_channels(output_channels)
507             .input_width(input_width)
508             .input_height(9)
509             .output_y_start(output_y_start)
510             .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
511         }
512       }
513     }
514   }
515 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,output_y_end)516   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, output_y_end) {
517     TEST_REQUIRES_ARM_NEON_FMA;
518     for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
519       for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
520         for (size_t input_width = 1; input_width < 32; input_width += 7) {
521           ConvHWC2CHWMicrokernelTester()
522             .kernel_size(3)
523             .subsampling(2)
524             .padding_width(1)
525             .input_channels(3)
526             .output_channels_tile(4)
527             .output_channels(output_channels)
528             .input_width(input_width)
529             .input_height(9)
530             .output_y_end(output_y_end)
531             .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
532         }
533       }
534     }
535   }
536 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,qmin)537   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, qmin) {
538     TEST_REQUIRES_ARM_NEON_FMA;
539     for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
540       for (size_t input_width = 1; input_width < 32; input_width += 7) {
541         ConvHWC2CHWMicrokernelTester()
542           .kernel_size(3)
543           .subsampling(2)
544           .padding_width(1)
545           .input_channels(3)
546           .output_channels_tile(4)
547           .output_channels(output_channels)
548           .input_width(input_width)
549           .input_height(6)
550           .qmin(128)
551           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
552       }
553     }
554   }
555 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,qmax)556   TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, qmax) {
557     TEST_REQUIRES_ARM_NEON_FMA;
558     for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
559       for (size_t input_width = 1; input_width < 32; input_width += 7) {
560         ConvHWC2CHWMicrokernelTester()
561           .kernel_size(3)
562           .subsampling(2)
563           .padding_width(1)
564           .input_channels(3)
565           .output_channels_tile(4)
566           .output_channels(output_channels)
567           .input_width(input_width)
568           .input_height(6)
569           .qmax(128)
570           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
571       }
572     }
573   }
574 #endif  // XNN_ARCH_ARM64
575 
576 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,input_width_eq_1)577 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, input_width_eq_1) {
578   TEST_REQUIRES_X86_SSE;
579   ConvHWC2CHWMicrokernelTester()
580     .kernel_size(3)
581     .subsampling(2)
582     .padding_width(1)
583     .input_channels(3)
584     .output_channels_tile(4)
585     .output_channels(4)
586     .input_width(4)
587     .input_height(3)
588     .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
589 }
590 
591 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,input_width_gt_1)592 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, input_width_gt_1) {
593   TEST_REQUIRES_X86_SSE;
594   for (size_t input_width = 2; input_width < 33; input_width++) {
595     ConvHWC2CHWMicrokernelTester()
596       .kernel_size(3)
597       .subsampling(2)
598       .padding_width(1)
599       .input_channels(3)
600       .output_channels_tile(4)
601       .output_channels(4)
602       .input_width(input_width)
603       .input_height(3)
604       .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
605   }
606 }
607 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,output_channels_lt_4)608 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, output_channels_lt_4) {
609   TEST_REQUIRES_X86_SSE;
610   for (size_t output_channels = 1; output_channels < 4; output_channels++) {
611     for (size_t input_width = 1; input_width < 32; input_width += 7) {
612       ConvHWC2CHWMicrokernelTester()
613         .kernel_size(3)
614         .subsampling(2)
615         .padding_width(1)
616         .input_channels(3)
617         .output_channels_tile(4)
618         .output_channels(output_channels)
619         .input_width(input_width)
620         .input_height(3)
621         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
622     }
623   }
624 }
625 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,output_channels_div_4)626 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, output_channels_div_4) {
627   TEST_REQUIRES_X86_SSE;
628   for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
629     for (size_t input_width = 1; input_width < 32; input_width += 7) {
630       ConvHWC2CHWMicrokernelTester()
631         .kernel_size(3)
632         .subsampling(2)
633         .padding_width(1)
634         .input_channels(3)
635         .output_channels_tile(4)
636         .output_channels(output_channels)
637         .input_width(input_width)
638         .input_height(3)
639         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
640     }
641   }
642 }
643 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,output_channels_gt_4)644 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, output_channels_gt_4) {
645   TEST_REQUIRES_X86_SSE;
646   for (size_t output_channels = 5; output_channels < 8; output_channels++) {
647     for (size_t input_width = 1; input_width < 32; input_width += 7) {
648       ConvHWC2CHWMicrokernelTester()
649         .kernel_size(3)
650         .subsampling(2)
651         .padding_width(1)
652         .input_channels(3)
653         .output_channels_tile(4)
654         .output_channels(output_channels)
655         .input_width(input_width)
656         .input_height(3)
657         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
658     }
659   }
660 }
661 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,input_height_lt_3)662 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, input_height_lt_3) {
663   TEST_REQUIRES_X86_SSE;
664   for (size_t input_height = 1; input_height < 3; input_height++) {
665     for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
666       for (size_t input_width = 1; input_width < 32; input_width += 7) {
667         ConvHWC2CHWMicrokernelTester()
668           .kernel_size(3)
669           .subsampling(2)
670           .padding(1)
671           .input_channels(3) // padded input height of at least 3 required
672           .output_channels_tile(4)
673           .output_channels(output_channels)
674           .input_width(input_width)
675           .input_height(input_height)
676           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
677       }
678     }
679   }
680 }
681 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,input_height_gt_3)682 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, input_height_gt_3) {
683   TEST_REQUIRES_X86_SSE;
684   for (size_t input_height = 4; input_height <= 9; input_height++) {
685     for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
686       for (size_t input_width = 1; input_width < 32; input_width += 7) {
687         ConvHWC2CHWMicrokernelTester()
688           .kernel_size(3)
689           .subsampling(2)
690           .padding_width(1)
691           .input_channels(3)
692           .output_channels_tile(4)
693           .output_channels(output_channels)
694           .input_width(input_width)
695           .input_height(input_height)
696           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
697       }
698     }
699   }
700 }
701 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,padding_top)702 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, padding_top) {
703   TEST_REQUIRES_X86_SSE;
704   for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
705     for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
706       for (size_t input_width = 1; input_width < 32; input_width += 7) {
707         ConvHWC2CHWMicrokernelTester()
708           .kernel_size(3)
709           .subsampling(2)
710           .padding_width(1)
711           .padding_top(padding_top)
712           .input_channels(3)
713           .output_channels_tile(4)
714           .output_channels(output_channels)
715           .input_width(input_width)
716           .input_height(9)
717           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
718       }
719     }
720   }
721 }
722 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,padding_bottom)723 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, padding_bottom) {
724   TEST_REQUIRES_X86_SSE;
725   for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
726     for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
727       for (size_t input_width = 1; input_width < 32; input_width += 7) {
728         ConvHWC2CHWMicrokernelTester()
729           .kernel_size(3)
730           .subsampling(2)
731           .padding_width(1)
732           .padding_bottom(padding_bottom)
733           .input_channels(3)
734           .output_channels_tile(4)
735           .output_channels(output_channels)
736           .input_width(input_width)
737           .input_height(9)
738           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
739       }
740     }
741   }
742 }
743 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,output_y_start)744 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, output_y_start) {
745   TEST_REQUIRES_X86_SSE;
746   for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
747     for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
748       for (size_t input_width = 1; input_width < 32; input_width += 7) {
749         ConvHWC2CHWMicrokernelTester()
750           .kernel_size(3)
751           .subsampling(2)
752           .padding_width(1)
753           .input_channels(3)
754           .output_channels_tile(4)
755           .output_channels(output_channels)
756           .input_width(input_width)
757           .input_height(9)
758           .output_y_start(output_y_start)
759           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
760       }
761     }
762   }
763 }
764 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,output_y_end)765 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, output_y_end) {
766   TEST_REQUIRES_X86_SSE;
767   for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
768     for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
769       for (size_t input_width = 1; input_width < 32; input_width += 7) {
770         ConvHWC2CHWMicrokernelTester()
771           .kernel_size(3)
772           .subsampling(2)
773           .padding_width(1)
774           .input_channels(3)
775           .output_channels_tile(4)
776           .output_channels(output_channels)
777           .input_width(input_width)
778           .input_height(9)
779           .output_y_end(output_y_end)
780           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
781       }
782     }
783   }
784 }
785 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,qmin)786 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, qmin) {
787   TEST_REQUIRES_X86_SSE;
788   for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
789     for (size_t input_width = 1; input_width < 32; input_width += 7) {
790       ConvHWC2CHWMicrokernelTester()
791         .kernel_size(3)
792         .subsampling(2)
793         .padding_width(1)
794         .input_channels(3)
795         .output_channels_tile(4)
796         .output_channels(output_channels)
797         .input_width(input_width)
798         .input_height(6)
799         .qmin(128)
800         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
801     }
802   }
803 }
804 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,qmax)805 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, qmax) {
806   TEST_REQUIRES_X86_SSE;
807   for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
808     for (size_t input_width = 1; input_width < 32; input_width += 7) {
809       ConvHWC2CHWMicrokernelTester()
810         .kernel_size(3)
811         .subsampling(2)
812         .padding_width(1)
813         .input_channels(3)
814         .output_channels_tile(4)
815         .output_channels(output_channels)
816         .input_width(input_width)
817         .input_height(6)
818         .qmax(128)
819         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
820     }
821   }
822 }
823 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
824 
825 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,input_width_eq_1)826 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, input_width_eq_1) {
827   TEST_REQUIRES_X86_SSE;
828   ConvHWC2CHWMicrokernelTester()
829     .kernel_size(3)
830     .subsampling(2)
831     .padding_width(1)
832     .input_channels(3)
833     .output_channels_tile(4)
834     .output_channels(4)
835     .input_width(4)
836     .input_height(3)
837     .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
838 }
839 
840 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,input_width_gt_1)841 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, input_width_gt_1) {
842   TEST_REQUIRES_X86_SSE;
843   for (size_t input_width = 2; input_width < 33; input_width++) {
844     ConvHWC2CHWMicrokernelTester()
845       .kernel_size(3)
846       .subsampling(2)
847       .padding_width(1)
848       .input_channels(3)
849       .output_channels_tile(4)
850       .output_channels(4)
851       .input_width(input_width)
852       .input_height(3)
853       .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
854   }
855 }
856 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,output_channels_lt_4)857 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, output_channels_lt_4) {
858   TEST_REQUIRES_X86_SSE;
859   for (size_t output_channels = 1; output_channels < 4; output_channels++) {
860     for (size_t input_width = 1; input_width < 32; input_width += 7) {
861       ConvHWC2CHWMicrokernelTester()
862         .kernel_size(3)
863         .subsampling(2)
864         .padding_width(1)
865         .input_channels(3)
866         .output_channels_tile(4)
867         .output_channels(output_channels)
868         .input_width(input_width)
869         .input_height(3)
870         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
871     }
872   }
873 }
874 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,output_channels_div_4)875 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, output_channels_div_4) {
876   TEST_REQUIRES_X86_SSE;
877   for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
878     for (size_t input_width = 1; input_width < 32; input_width += 7) {
879       ConvHWC2CHWMicrokernelTester()
880         .kernel_size(3)
881         .subsampling(2)
882         .padding_width(1)
883         .input_channels(3)
884         .output_channels_tile(4)
885         .output_channels(output_channels)
886         .input_width(input_width)
887         .input_height(3)
888         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
889     }
890   }
891 }
892 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,output_channels_gt_4)893 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, output_channels_gt_4) {
894   TEST_REQUIRES_X86_SSE;
895   for (size_t output_channels = 5; output_channels < 8; output_channels++) {
896     for (size_t input_width = 1; input_width < 32; input_width += 7) {
897       ConvHWC2CHWMicrokernelTester()
898         .kernel_size(3)
899         .subsampling(2)
900         .padding_width(1)
901         .input_channels(3)
902         .output_channels_tile(4)
903         .output_channels(output_channels)
904         .input_width(input_width)
905         .input_height(3)
906         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
907     }
908   }
909 }
910 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,input_height_lt_3)911 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, input_height_lt_3) {
912   TEST_REQUIRES_X86_SSE;
913   for (size_t input_height = 1; input_height < 3; input_height++) {
914     for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
915       for (size_t input_width = 1; input_width < 32; input_width += 7) {
916         ConvHWC2CHWMicrokernelTester()
917           .kernel_size(3)
918           .subsampling(2)
919           .padding(1)
920           .input_channels(3) // padded input height of at least 3 required
921           .output_channels_tile(4)
922           .output_channels(output_channels)
923           .input_width(input_width)
924           .input_height(input_height)
925           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
926       }
927     }
928   }
929 }
930 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,input_height_gt_3)931 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, input_height_gt_3) {
932   TEST_REQUIRES_X86_SSE;
933   for (size_t input_height = 4; input_height <= 9; input_height++) {
934     for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
935       for (size_t input_width = 1; input_width < 32; input_width += 7) {
936         ConvHWC2CHWMicrokernelTester()
937           .kernel_size(3)
938           .subsampling(2)
939           .padding_width(1)
940           .input_channels(3)
941           .output_channels_tile(4)
942           .output_channels(output_channels)
943           .input_width(input_width)
944           .input_height(input_height)
945           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
946       }
947     }
948   }
949 }
950 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,padding_top)951 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, padding_top) {
952   TEST_REQUIRES_X86_SSE;
953   for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
954     for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
955       for (size_t input_width = 1; input_width < 32; input_width += 7) {
956         ConvHWC2CHWMicrokernelTester()
957           .kernel_size(3)
958           .subsampling(2)
959           .padding_width(1)
960           .padding_top(padding_top)
961           .input_channels(3)
962           .output_channels_tile(4)
963           .output_channels(output_channels)
964           .input_width(input_width)
965           .input_height(9)
966           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
967       }
968     }
969   }
970 }
971 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,padding_bottom)972 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, padding_bottom) {
973   TEST_REQUIRES_X86_SSE;
974   for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
975     for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
976       for (size_t input_width = 1; input_width < 32; input_width += 7) {
977         ConvHWC2CHWMicrokernelTester()
978           .kernel_size(3)
979           .subsampling(2)
980           .padding_width(1)
981           .padding_bottom(padding_bottom)
982           .input_channels(3)
983           .output_channels_tile(4)
984           .output_channels(output_channels)
985           .input_width(input_width)
986           .input_height(9)
987           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
988       }
989     }
990   }
991 }
992 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,output_y_start)993 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, output_y_start) {
994   TEST_REQUIRES_X86_SSE;
995   for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
996     for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
997       for (size_t input_width = 1; input_width < 32; input_width += 7) {
998         ConvHWC2CHWMicrokernelTester()
999           .kernel_size(3)
1000           .subsampling(2)
1001           .padding_width(1)
1002           .input_channels(3)
1003           .output_channels_tile(4)
1004           .output_channels(output_channels)
1005           .input_width(input_width)
1006           .input_height(9)
1007           .output_y_start(output_y_start)
1008           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
1009       }
1010     }
1011   }
1012 }
1013 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,output_y_end)1014 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, output_y_end) {
1015   TEST_REQUIRES_X86_SSE;
1016   for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
1017     for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1018       for (size_t input_width = 1; input_width < 32; input_width += 7) {
1019         ConvHWC2CHWMicrokernelTester()
1020           .kernel_size(3)
1021           .subsampling(2)
1022           .padding_width(1)
1023           .input_channels(3)
1024           .output_channels_tile(4)
1025           .output_channels(output_channels)
1026           .input_width(input_width)
1027           .input_height(9)
1028           .output_y_end(output_y_end)
1029           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
1030       }
1031     }
1032   }
1033 }
1034 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,qmin)1035 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, qmin) {
1036   TEST_REQUIRES_X86_SSE;
1037   for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1038     for (size_t input_width = 1; input_width < 32; input_width += 7) {
1039       ConvHWC2CHWMicrokernelTester()
1040         .kernel_size(3)
1041         .subsampling(2)
1042         .padding_width(1)
1043         .input_channels(3)
1044         .output_channels_tile(4)
1045         .output_channels(output_channels)
1046         .input_width(input_width)
1047         .input_height(6)
1048         .qmin(128)
1049         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
1050     }
1051   }
1052 }
1053 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,qmax)1054 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, qmax) {
1055   TEST_REQUIRES_X86_SSE;
1056   for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1057     for (size_t input_width = 1; input_width < 32; input_width += 7) {
1058       ConvHWC2CHWMicrokernelTester()
1059         .kernel_size(3)
1060         .subsampling(2)
1061         .padding_width(1)
1062         .input_channels(3)
1063         .output_channels_tile(4)
1064         .output_channels(output_channels)
1065         .input_width(input_width)
1066         .input_height(6)
1067         .qmax(128)
1068         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
1069     }
1070   }
1071 }
1072 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1073 
1074 #if XNN_ARCH_WASMSIMD
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,input_width_eq_1)1075 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, input_width_eq_1) {
1076   ConvHWC2CHWMicrokernelTester()
1077     .kernel_size(3)
1078     .subsampling(2)
1079     .padding_width(1)
1080     .input_channels(3)
1081     .output_channels_tile(4)
1082     .output_channels(4)
1083     .input_width(4)
1084     .input_height(3)
1085     .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1086 }
1087 
1088 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,input_width_gt_1)1089 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, input_width_gt_1) {
1090   for (size_t input_width = 2; input_width < 33; input_width++) {
1091     ConvHWC2CHWMicrokernelTester()
1092       .kernel_size(3)
1093       .subsampling(2)
1094       .padding_width(1)
1095       .input_channels(3)
1096       .output_channels_tile(4)
1097       .output_channels(4)
1098       .input_width(input_width)
1099       .input_height(3)
1100       .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1101   }
1102 }
1103 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,output_channels_lt_4)1104 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, output_channels_lt_4) {
1105   for (size_t output_channels = 1; output_channels < 4; output_channels++) {
1106     for (size_t input_width = 1; input_width < 32; input_width += 7) {
1107       ConvHWC2CHWMicrokernelTester()
1108         .kernel_size(3)
1109         .subsampling(2)
1110         .padding_width(1)
1111         .input_channels(3)
1112         .output_channels_tile(4)
1113         .output_channels(output_channels)
1114         .input_width(input_width)
1115         .input_height(3)
1116         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1117     }
1118   }
1119 }
1120 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,output_channels_div_4)1121 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, output_channels_div_4) {
1122   for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
1123     for (size_t input_width = 1; input_width < 32; input_width += 7) {
1124       ConvHWC2CHWMicrokernelTester()
1125         .kernel_size(3)
1126         .subsampling(2)
1127         .padding_width(1)
1128         .input_channels(3)
1129         .output_channels_tile(4)
1130         .output_channels(output_channels)
1131         .input_width(input_width)
1132         .input_height(3)
1133         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1134     }
1135   }
1136 }
1137 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,output_channels_gt_4)1138 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, output_channels_gt_4) {
1139   for (size_t output_channels = 5; output_channels < 8; output_channels++) {
1140     for (size_t input_width = 1; input_width < 32; input_width += 7) {
1141       ConvHWC2CHWMicrokernelTester()
1142         .kernel_size(3)
1143         .subsampling(2)
1144         .padding_width(1)
1145         .input_channels(3)
1146         .output_channels_tile(4)
1147         .output_channels(output_channels)
1148         .input_width(input_width)
1149         .input_height(3)
1150         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1151     }
1152   }
1153 }
1154 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,input_height_lt_3)1155 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, input_height_lt_3) {
1156   for (size_t input_height = 1; input_height < 3; input_height++) {
1157     for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1158       for (size_t input_width = 1; input_width < 32; input_width += 7) {
1159         ConvHWC2CHWMicrokernelTester()
1160           .kernel_size(3)
1161           .subsampling(2)
1162           .padding(1)
1163           .input_channels(3) // padded input height of at least 3 required
1164           .output_channels_tile(4)
1165           .output_channels(output_channels)
1166           .input_width(input_width)
1167           .input_height(input_height)
1168           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1169       }
1170     }
1171   }
1172 }
1173 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,input_height_gt_3)1174 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, input_height_gt_3) {
1175   for (size_t input_height = 4; input_height <= 9; input_height++) {
1176     for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1177       for (size_t input_width = 1; input_width < 32; input_width += 7) {
1178         ConvHWC2CHWMicrokernelTester()
1179           .kernel_size(3)
1180           .subsampling(2)
1181           .padding_width(1)
1182           .input_channels(3)
1183           .output_channels_tile(4)
1184           .output_channels(output_channels)
1185           .input_width(input_width)
1186           .input_height(input_height)
1187           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1188       }
1189     }
1190   }
1191 }
1192 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,padding_top)1193 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, padding_top) {
1194   for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
1195     for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1196       for (size_t input_width = 1; input_width < 32; input_width += 7) {
1197         ConvHWC2CHWMicrokernelTester()
1198           .kernel_size(3)
1199           .subsampling(2)
1200           .padding_width(1)
1201           .padding_top(padding_top)
1202           .input_channels(3)
1203           .output_channels_tile(4)
1204           .output_channels(output_channels)
1205           .input_width(input_width)
1206           .input_height(9)
1207           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1208       }
1209     }
1210   }
1211 }
1212 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,padding_bottom)1213 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, padding_bottom) {
1214   for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
1215     for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1216       for (size_t input_width = 1; input_width < 32; input_width += 7) {
1217         ConvHWC2CHWMicrokernelTester()
1218           .kernel_size(3)
1219           .subsampling(2)
1220           .padding_width(1)
1221           .padding_bottom(padding_bottom)
1222           .input_channels(3)
1223           .output_channels_tile(4)
1224           .output_channels(output_channels)
1225           .input_width(input_width)
1226           .input_height(9)
1227           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1228       }
1229     }
1230   }
1231 }
1232 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,output_y_start)1233 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, output_y_start) {
1234   for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
1235     for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1236       for (size_t input_width = 1; input_width < 32; input_width += 7) {
1237         ConvHWC2CHWMicrokernelTester()
1238           .kernel_size(3)
1239           .subsampling(2)
1240           .padding_width(1)
1241           .input_channels(3)
1242           .output_channels_tile(4)
1243           .output_channels(output_channels)
1244           .input_width(input_width)
1245           .input_height(9)
1246           .output_y_start(output_y_start)
1247           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1248       }
1249     }
1250   }
1251 }
1252 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,output_y_end)1253 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, output_y_end) {
1254   for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
1255     for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1256       for (size_t input_width = 1; input_width < 32; input_width += 7) {
1257         ConvHWC2CHWMicrokernelTester()
1258           .kernel_size(3)
1259           .subsampling(2)
1260           .padding_width(1)
1261           .input_channels(3)
1262           .output_channels_tile(4)
1263           .output_channels(output_channels)
1264           .input_width(input_width)
1265           .input_height(9)
1266           .output_y_end(output_y_end)
1267           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1268       }
1269     }
1270   }
1271 }
1272 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,qmin)1273 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, qmin) {
1274   for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1275     for (size_t input_width = 1; input_width < 32; input_width += 7) {
1276       ConvHWC2CHWMicrokernelTester()
1277         .kernel_size(3)
1278         .subsampling(2)
1279         .padding_width(1)
1280         .input_channels(3)
1281         .output_channels_tile(4)
1282         .output_channels(output_channels)
1283         .input_width(input_width)
1284         .input_height(6)
1285         .qmin(128)
1286         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1287     }
1288   }
1289 }
1290 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,qmax)1291 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, qmax) {
1292   for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1293     for (size_t input_width = 1; input_width < 32; input_width += 7) {
1294       ConvHWC2CHWMicrokernelTester()
1295         .kernel_size(3)
1296         .subsampling(2)
1297         .padding_width(1)
1298         .input_channels(3)
1299         .output_channels_tile(4)
1300         .output_channels(output_channels)
1301         .input_width(input_width)
1302         .input_height(6)
1303         .qmax(128)
1304         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1305     }
1306   }
1307 }
1308 #endif  // XNN_ARCH_WASMSIMD
1309 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,input_width_eq_1)1310 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, input_width_eq_1) {
1311   ConvHWC2CHWMicrokernelTester()
1312     .kernel_size(3)
1313     .subsampling(2)
1314     .padding_width(1)
1315     .input_channels(3)
1316     .output_channels_tile(4)
1317     .output_channels(4)
1318     .input_width(4)
1319     .input_height(3)
1320     .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1321 }
1322 
1323 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,input_width_gt_1)1324 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, input_width_gt_1) {
1325   for (size_t input_width = 2; input_width < 33; input_width++) {
1326     ConvHWC2CHWMicrokernelTester()
1327       .kernel_size(3)
1328       .subsampling(2)
1329       .padding_width(1)
1330       .input_channels(3)
1331       .output_channels_tile(4)
1332       .output_channels(4)
1333       .input_width(input_width)
1334       .input_height(3)
1335       .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1336   }
1337 }
1338 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,output_channels_lt_4)1339 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, output_channels_lt_4) {
1340   for (size_t output_channels = 1; output_channels < 4; output_channels++) {
1341     for (size_t input_width = 1; input_width < 32; input_width += 7) {
1342       ConvHWC2CHWMicrokernelTester()
1343         .kernel_size(3)
1344         .subsampling(2)
1345         .padding_width(1)
1346         .input_channels(3)
1347         .output_channels_tile(4)
1348         .output_channels(output_channels)
1349         .input_width(input_width)
1350         .input_height(3)
1351         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1352     }
1353   }
1354 }
1355 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,output_channels_div_4)1356 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, output_channels_div_4) {
1357   for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
1358     for (size_t input_width = 1; input_width < 32; input_width += 7) {
1359       ConvHWC2CHWMicrokernelTester()
1360         .kernel_size(3)
1361         .subsampling(2)
1362         .padding_width(1)
1363         .input_channels(3)
1364         .output_channels_tile(4)
1365         .output_channels(output_channels)
1366         .input_width(input_width)
1367         .input_height(3)
1368         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1369     }
1370   }
1371 }
1372 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,output_channels_gt_4)1373 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, output_channels_gt_4) {
1374   for (size_t output_channels = 5; output_channels < 8; output_channels++) {
1375     for (size_t input_width = 1; input_width < 32; input_width += 7) {
1376       ConvHWC2CHWMicrokernelTester()
1377         .kernel_size(3)
1378         .subsampling(2)
1379         .padding_width(1)
1380         .input_channels(3)
1381         .output_channels_tile(4)
1382         .output_channels(output_channels)
1383         .input_width(input_width)
1384         .input_height(3)
1385         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1386     }
1387   }
1388 }
1389 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,input_height_lt_3)1390 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, input_height_lt_3) {
1391   for (size_t input_height = 1; input_height < 3; input_height++) {
1392     for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1393       for (size_t input_width = 1; input_width < 32; input_width += 7) {
1394         ConvHWC2CHWMicrokernelTester()
1395           .kernel_size(3)
1396           .subsampling(2)
1397           .padding(1)
1398           .input_channels(3) // padded input height of at least 3 required
1399           .output_channels_tile(4)
1400           .output_channels(output_channels)
1401           .input_width(input_width)
1402           .input_height(input_height)
1403           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1404       }
1405     }
1406   }
1407 }
1408 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,input_height_gt_3)1409 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, input_height_gt_3) {
1410   for (size_t input_height = 4; input_height <= 9; input_height++) {
1411     for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1412       for (size_t input_width = 1; input_width < 32; input_width += 7) {
1413         ConvHWC2CHWMicrokernelTester()
1414           .kernel_size(3)
1415           .subsampling(2)
1416           .padding_width(1)
1417           .input_channels(3)
1418           .output_channels_tile(4)
1419           .output_channels(output_channels)
1420           .input_width(input_width)
1421           .input_height(input_height)
1422           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1423       }
1424     }
1425   }
1426 }
1427 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,padding_top)1428 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, padding_top) {
1429   for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
1430     for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1431       for (size_t input_width = 1; input_width < 32; input_width += 7) {
1432         ConvHWC2CHWMicrokernelTester()
1433           .kernel_size(3)
1434           .subsampling(2)
1435           .padding_width(1)
1436           .padding_top(padding_top)
1437           .input_channels(3)
1438           .output_channels_tile(4)
1439           .output_channels(output_channels)
1440           .input_width(input_width)
1441           .input_height(9)
1442           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1443       }
1444     }
1445   }
1446 }
1447 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,padding_bottom)1448 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, padding_bottom) {
1449   for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
1450     for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1451       for (size_t input_width = 1; input_width < 32; input_width += 7) {
1452         ConvHWC2CHWMicrokernelTester()
1453           .kernel_size(3)
1454           .subsampling(2)
1455           .padding_width(1)
1456           .padding_bottom(padding_bottom)
1457           .input_channels(3)
1458           .output_channels_tile(4)
1459           .output_channels(output_channels)
1460           .input_width(input_width)
1461           .input_height(9)
1462           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1463       }
1464     }
1465   }
1466 }
1467 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,output_y_start)1468 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, output_y_start) {
1469   for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
1470     for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1471       for (size_t input_width = 1; input_width < 32; input_width += 7) {
1472         ConvHWC2CHWMicrokernelTester()
1473           .kernel_size(3)
1474           .subsampling(2)
1475           .padding_width(1)
1476           .input_channels(3)
1477           .output_channels_tile(4)
1478           .output_channels(output_channels)
1479           .input_width(input_width)
1480           .input_height(9)
1481           .output_y_start(output_y_start)
1482           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1483       }
1484     }
1485   }
1486 }
1487 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,output_y_end)1488 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, output_y_end) {
1489   for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
1490     for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1491       for (size_t input_width = 1; input_width < 32; input_width += 7) {
1492         ConvHWC2CHWMicrokernelTester()
1493           .kernel_size(3)
1494           .subsampling(2)
1495           .padding_width(1)
1496           .input_channels(3)
1497           .output_channels_tile(4)
1498           .output_channels(output_channels)
1499           .input_width(input_width)
1500           .input_height(9)
1501           .output_y_end(output_y_end)
1502           .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1503       }
1504     }
1505   }
1506 }
1507 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,qmin)1508 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, qmin) {
1509   for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1510     for (size_t input_width = 1; input_width < 32; input_width += 7) {
1511       ConvHWC2CHWMicrokernelTester()
1512         .kernel_size(3)
1513         .subsampling(2)
1514         .padding_width(1)
1515         .input_channels(3)
1516         .output_channels_tile(4)
1517         .output_channels(output_channels)
1518         .input_width(input_width)
1519         .input_height(6)
1520         .qmin(128)
1521         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1522     }
1523   }
1524 }
1525 
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,qmax)1526 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, qmax) {
1527   for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1528     for (size_t input_width = 1; input_width < 32; input_width += 7) {
1529       ConvHWC2CHWMicrokernelTester()
1530         .kernel_size(3)
1531         .subsampling(2)
1532         .padding_width(1)
1533         .input_channels(3)
1534         .output_channels_tile(4)
1535         .output_channels(output_channels)
1536         .input_width(input_width)
1537         .input_height(6)
1538         .qmax(128)
1539         .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1540     }
1541   }
1542 }
1543