1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <gtest/gtest.h>
7
8 #include <xnnpack/common.h>
9 #include <xnnpack/isa-checks.h>
10
11 #include <xnnpack/conv.h>
12 #include "conv-hwc2chw-microkernel-tester.h"
13
14
15 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,input_width_eq_4)16 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, input_width_eq_4) {
17 TEST_REQUIRES_ARM_NEON;
18 ConvHWC2CHWMicrokernelTester()
19 .kernel_size(3)
20 .subsampling(2)
21 .padding_width(1)
22 .input_channels(3)
23 .output_channels_tile(4)
24 .output_channels(4)
25 .input_width(4)
26 .input_height(3)
27 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
28 }
29
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,input_width_div_4)30 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, input_width_div_4) {
31 TEST_REQUIRES_ARM_NEON;
32 for (size_t input_width = 8; input_width <= 32; input_width += 12) {
33 ConvHWC2CHWMicrokernelTester()
34 .kernel_size(3)
35 .subsampling(2)
36 .padding_width(1)
37 .input_channels(3)
38 .output_channels_tile(4)
39 .output_channels(4)
40 .input_width(input_width)
41 .input_height(3)
42 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
43 }
44 }
45
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,input_width_lt_4)46 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, input_width_lt_4) {
47 TEST_REQUIRES_ARM_NEON;
48 for (size_t input_width = 1; input_width < 4; input_width++) {
49 ConvHWC2CHWMicrokernelTester()
50 .kernel_size(3)
51 .subsampling(2)
52 .padding_width(1)
53 .input_channels(3)
54 .output_channels_tile(4)
55 .output_channels(4)
56 .input_width(input_width)
57 .input_height(3)
58 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
59 }
60 }
61
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,input_width_gt_4)62 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, input_width_gt_4) {
63 TEST_REQUIRES_ARM_NEON;
64 for (size_t input_width = 5; input_width < 8; input_width++) {
65 ConvHWC2CHWMicrokernelTester()
66 .kernel_size(3)
67 .subsampling(2)
68 .padding_width(1)
69 .input_channels(3)
70 .output_channels_tile(4)
71 .output_channels(4)
72 .input_width(input_width)
73 .input_height(3)
74 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
75 }
76 }
77
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,output_channels_lt_4)78 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, output_channels_lt_4) {
79 TEST_REQUIRES_ARM_NEON;
80 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
81 for (size_t input_width = 1; input_width < 32; input_width += 7) {
82 ConvHWC2CHWMicrokernelTester()
83 .kernel_size(3)
84 .subsampling(2)
85 .padding_width(1)
86 .input_channels(3)
87 .output_channels_tile(4)
88 .output_channels(output_channels)
89 .input_width(input_width)
90 .input_height(3)
91 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
92 }
93 }
94 }
95
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,output_channels_div_4)96 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, output_channels_div_4) {
97 TEST_REQUIRES_ARM_NEON;
98 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
99 for (size_t input_width = 1; input_width < 32; input_width += 7) {
100 ConvHWC2CHWMicrokernelTester()
101 .kernel_size(3)
102 .subsampling(2)
103 .padding_width(1)
104 .input_channels(3)
105 .output_channels_tile(4)
106 .output_channels(output_channels)
107 .input_width(input_width)
108 .input_height(3)
109 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
110 }
111 }
112 }
113
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,output_channels_gt_4)114 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, output_channels_gt_4) {
115 TEST_REQUIRES_ARM_NEON;
116 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
117 for (size_t input_width = 1; input_width < 32; input_width += 7) {
118 ConvHWC2CHWMicrokernelTester()
119 .kernel_size(3)
120 .subsampling(2)
121 .padding_width(1)
122 .input_channels(3)
123 .output_channels_tile(4)
124 .output_channels(output_channels)
125 .input_width(input_width)
126 .input_height(3)
127 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
128 }
129 }
130 }
131
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,input_height_lt_3)132 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, input_height_lt_3) {
133 TEST_REQUIRES_ARM_NEON;
134 for (size_t input_height = 1; input_height < 3; input_height++) {
135 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
136 for (size_t input_width = 1; input_width < 32; input_width += 7) {
137 ConvHWC2CHWMicrokernelTester()
138 .kernel_size(3)
139 .subsampling(2)
140 .padding(1)
141 .input_channels(3) // padded input height of at least 3 required
142 .output_channels_tile(4)
143 .output_channels(output_channels)
144 .input_width(input_width)
145 .input_height(input_height)
146 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
147 }
148 }
149 }
150 }
151
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,input_height_gt_3)152 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, input_height_gt_3) {
153 TEST_REQUIRES_ARM_NEON;
154 for (size_t input_height = 4; input_height <= 9; input_height++) {
155 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
156 for (size_t input_width = 1; input_width < 32; input_width += 7) {
157 ConvHWC2CHWMicrokernelTester()
158 .kernel_size(3)
159 .subsampling(2)
160 .padding_width(1)
161 .input_channels(3)
162 .output_channels_tile(4)
163 .output_channels(output_channels)
164 .input_width(input_width)
165 .input_height(input_height)
166 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
167 }
168 }
169 }
170 }
171
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,padding_top)172 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, padding_top) {
173 TEST_REQUIRES_ARM_NEON;
174 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
175 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
176 for (size_t input_width = 1; input_width < 32; input_width += 7) {
177 ConvHWC2CHWMicrokernelTester()
178 .kernel_size(3)
179 .subsampling(2)
180 .padding_width(1)
181 .padding_top(padding_top)
182 .input_channels(3)
183 .output_channels_tile(4)
184 .output_channels(output_channels)
185 .input_width(input_width)
186 .input_height(9)
187 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
188 }
189 }
190 }
191 }
192
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,padding_bottom)193 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, padding_bottom) {
194 TEST_REQUIRES_ARM_NEON;
195 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
196 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
197 for (size_t input_width = 1; input_width < 32; input_width += 7) {
198 ConvHWC2CHWMicrokernelTester()
199 .kernel_size(3)
200 .subsampling(2)
201 .padding_width(1)
202 .padding_bottom(padding_bottom)
203 .input_channels(3)
204 .output_channels_tile(4)
205 .output_channels(output_channels)
206 .input_width(input_width)
207 .input_height(9)
208 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
209 }
210 }
211 }
212 }
213
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,output_y_start)214 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, output_y_start) {
215 TEST_REQUIRES_ARM_NEON;
216 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
217 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
218 for (size_t input_width = 1; input_width < 32; input_width += 7) {
219 ConvHWC2CHWMicrokernelTester()
220 .kernel_size(3)
221 .subsampling(2)
222 .padding_width(1)
223 .input_channels(3)
224 .output_channels_tile(4)
225 .output_channels(output_channels)
226 .input_width(input_width)
227 .input_height(9)
228 .output_y_start(output_y_start)
229 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
230 }
231 }
232 }
233 }
234
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,output_y_end)235 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, output_y_end) {
236 TEST_REQUIRES_ARM_NEON;
237 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
238 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
239 for (size_t input_width = 1; input_width < 32; input_width += 7) {
240 ConvHWC2CHWMicrokernelTester()
241 .kernel_size(3)
242 .subsampling(2)
243 .padding_width(1)
244 .input_channels(3)
245 .output_channels_tile(4)
246 .output_channels(output_channels)
247 .input_width(input_width)
248 .input_height(9)
249 .output_y_end(output_y_end)
250 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
251 }
252 }
253 }
254 }
255
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,qmin)256 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, qmin) {
257 TEST_REQUIRES_ARM_NEON;
258 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
259 for (size_t input_width = 1; input_width < 32; input_width += 7) {
260 ConvHWC2CHWMicrokernelTester()
261 .kernel_size(3)
262 .subsampling(2)
263 .padding_width(1)
264 .input_channels(3)
265 .output_channels_tile(4)
266 .output_channels(output_channels)
267 .input_width(input_width)
268 .input_height(6)
269 .qmin(128)
270 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
271 }
272 }
273 }
274
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2,qmax)275 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEON_2X2, qmax) {
276 TEST_REQUIRES_ARM_NEON;
277 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
278 for (size_t input_width = 1; input_width < 32; input_width += 7) {
279 ConvHWC2CHWMicrokernelTester()
280 .kernel_size(3)
281 .subsampling(2)
282 .padding_width(1)
283 .input_channels(3)
284 .output_channels_tile(4)
285 .output_channels(output_channels)
286 .input_width(input_width)
287 .input_height(6)
288 .qmax(128)
289 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2);
290 }
291 }
292 }
293 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
294
295
296 #if XNN_ARCH_ARM64
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,input_width_eq_4)297 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, input_width_eq_4) {
298 TEST_REQUIRES_ARM_NEON_FMA;
299 ConvHWC2CHWMicrokernelTester()
300 .kernel_size(3)
301 .subsampling(2)
302 .padding_width(1)
303 .input_channels(3)
304 .output_channels_tile(4)
305 .output_channels(4)
306 .input_width(4)
307 .input_height(3)
308 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
309 }
310
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,input_width_div_4)311 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, input_width_div_4) {
312 TEST_REQUIRES_ARM_NEON_FMA;
313 for (size_t input_width = 8; input_width <= 32; input_width += 12) {
314 ConvHWC2CHWMicrokernelTester()
315 .kernel_size(3)
316 .subsampling(2)
317 .padding_width(1)
318 .input_channels(3)
319 .output_channels_tile(4)
320 .output_channels(4)
321 .input_width(input_width)
322 .input_height(3)
323 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
324 }
325 }
326
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,input_width_lt_4)327 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, input_width_lt_4) {
328 TEST_REQUIRES_ARM_NEON_FMA;
329 for (size_t input_width = 1; input_width < 4; input_width++) {
330 ConvHWC2CHWMicrokernelTester()
331 .kernel_size(3)
332 .subsampling(2)
333 .padding_width(1)
334 .input_channels(3)
335 .output_channels_tile(4)
336 .output_channels(4)
337 .input_width(input_width)
338 .input_height(3)
339 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
340 }
341 }
342
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,input_width_gt_4)343 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, input_width_gt_4) {
344 TEST_REQUIRES_ARM_NEON_FMA;
345 for (size_t input_width = 5; input_width < 8; input_width++) {
346 ConvHWC2CHWMicrokernelTester()
347 .kernel_size(3)
348 .subsampling(2)
349 .padding_width(1)
350 .input_channels(3)
351 .output_channels_tile(4)
352 .output_channels(4)
353 .input_width(input_width)
354 .input_height(3)
355 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
356 }
357 }
358
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,output_channels_lt_4)359 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, output_channels_lt_4) {
360 TEST_REQUIRES_ARM_NEON_FMA;
361 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
362 for (size_t input_width = 1; input_width < 32; input_width += 7) {
363 ConvHWC2CHWMicrokernelTester()
364 .kernel_size(3)
365 .subsampling(2)
366 .padding_width(1)
367 .input_channels(3)
368 .output_channels_tile(4)
369 .output_channels(output_channels)
370 .input_width(input_width)
371 .input_height(3)
372 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
373 }
374 }
375 }
376
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,output_channels_div_4)377 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, output_channels_div_4) {
378 TEST_REQUIRES_ARM_NEON_FMA;
379 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
380 for (size_t input_width = 1; input_width < 32; input_width += 7) {
381 ConvHWC2CHWMicrokernelTester()
382 .kernel_size(3)
383 .subsampling(2)
384 .padding_width(1)
385 .input_channels(3)
386 .output_channels_tile(4)
387 .output_channels(output_channels)
388 .input_width(input_width)
389 .input_height(3)
390 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
391 }
392 }
393 }
394
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,output_channels_gt_4)395 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, output_channels_gt_4) {
396 TEST_REQUIRES_ARM_NEON_FMA;
397 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
398 for (size_t input_width = 1; input_width < 32; input_width += 7) {
399 ConvHWC2CHWMicrokernelTester()
400 .kernel_size(3)
401 .subsampling(2)
402 .padding_width(1)
403 .input_channels(3)
404 .output_channels_tile(4)
405 .output_channels(output_channels)
406 .input_width(input_width)
407 .input_height(3)
408 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
409 }
410 }
411 }
412
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,input_height_lt_3)413 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, input_height_lt_3) {
414 TEST_REQUIRES_ARM_NEON_FMA;
415 for (size_t input_height = 1; input_height < 3; input_height++) {
416 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
417 for (size_t input_width = 1; input_width < 32; input_width += 7) {
418 ConvHWC2CHWMicrokernelTester()
419 .kernel_size(3)
420 .subsampling(2)
421 .padding(1)
422 .input_channels(3) // padded input height of at least 3 required
423 .output_channels_tile(4)
424 .output_channels(output_channels)
425 .input_width(input_width)
426 .input_height(input_height)
427 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
428 }
429 }
430 }
431 }
432
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,input_height_gt_3)433 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, input_height_gt_3) {
434 TEST_REQUIRES_ARM_NEON_FMA;
435 for (size_t input_height = 4; input_height <= 9; input_height++) {
436 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
437 for (size_t input_width = 1; input_width < 32; input_width += 7) {
438 ConvHWC2CHWMicrokernelTester()
439 .kernel_size(3)
440 .subsampling(2)
441 .padding_width(1)
442 .input_channels(3)
443 .output_channels_tile(4)
444 .output_channels(output_channels)
445 .input_width(input_width)
446 .input_height(input_height)
447 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
448 }
449 }
450 }
451 }
452
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,padding_top)453 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, padding_top) {
454 TEST_REQUIRES_ARM_NEON_FMA;
455 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
456 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
457 for (size_t input_width = 1; input_width < 32; input_width += 7) {
458 ConvHWC2CHWMicrokernelTester()
459 .kernel_size(3)
460 .subsampling(2)
461 .padding_width(1)
462 .padding_top(padding_top)
463 .input_channels(3)
464 .output_channels_tile(4)
465 .output_channels(output_channels)
466 .input_width(input_width)
467 .input_height(9)
468 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
469 }
470 }
471 }
472 }
473
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,padding_bottom)474 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, padding_bottom) {
475 TEST_REQUIRES_ARM_NEON_FMA;
476 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
477 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
478 for (size_t input_width = 1; input_width < 32; input_width += 7) {
479 ConvHWC2CHWMicrokernelTester()
480 .kernel_size(3)
481 .subsampling(2)
482 .padding_width(1)
483 .padding_bottom(padding_bottom)
484 .input_channels(3)
485 .output_channels_tile(4)
486 .output_channels(output_channels)
487 .input_width(input_width)
488 .input_height(9)
489 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
490 }
491 }
492 }
493 }
494
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,output_y_start)495 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, output_y_start) {
496 TEST_REQUIRES_ARM_NEON_FMA;
497 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
498 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
499 for (size_t input_width = 1; input_width < 32; input_width += 7) {
500 ConvHWC2CHWMicrokernelTester()
501 .kernel_size(3)
502 .subsampling(2)
503 .padding_width(1)
504 .input_channels(3)
505 .output_channels_tile(4)
506 .output_channels(output_channels)
507 .input_width(input_width)
508 .input_height(9)
509 .output_y_start(output_y_start)
510 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
511 }
512 }
513 }
514 }
515
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,output_y_end)516 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, output_y_end) {
517 TEST_REQUIRES_ARM_NEON_FMA;
518 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
519 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
520 for (size_t input_width = 1; input_width < 32; input_width += 7) {
521 ConvHWC2CHWMicrokernelTester()
522 .kernel_size(3)
523 .subsampling(2)
524 .padding_width(1)
525 .input_channels(3)
526 .output_channels_tile(4)
527 .output_channels(output_channels)
528 .input_width(input_width)
529 .input_height(9)
530 .output_y_end(output_y_end)
531 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
532 }
533 }
534 }
535 }
536
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,qmin)537 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, qmin) {
538 TEST_REQUIRES_ARM_NEON_FMA;
539 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
540 for (size_t input_width = 1; input_width < 32; input_width += 7) {
541 ConvHWC2CHWMicrokernelTester()
542 .kernel_size(3)
543 .subsampling(2)
544 .padding_width(1)
545 .input_channels(3)
546 .output_channels_tile(4)
547 .output_channels(output_channels)
548 .input_width(input_width)
549 .input_height(6)
550 .qmin(128)
551 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
552 }
553 }
554 }
555
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2,qmax)556 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, qmax) {
557 TEST_REQUIRES_ARM_NEON_FMA;
558 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
559 for (size_t input_width = 1; input_width < 32; input_width += 7) {
560 ConvHWC2CHWMicrokernelTester()
561 .kernel_size(3)
562 .subsampling(2)
563 .padding_width(1)
564 .input_channels(3)
565 .output_channels_tile(4)
566 .output_channels(output_channels)
567 .input_width(input_width)
568 .input_height(6)
569 .qmax(128)
570 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
571 }
572 }
573 }
574 #endif // XNN_ARCH_ARM64
575
576 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,input_width_eq_1)577 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, input_width_eq_1) {
578 TEST_REQUIRES_X86_SSE;
579 ConvHWC2CHWMicrokernelTester()
580 .kernel_size(3)
581 .subsampling(2)
582 .padding_width(1)
583 .input_channels(3)
584 .output_channels_tile(4)
585 .output_channels(4)
586 .input_width(4)
587 .input_height(3)
588 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
589 }
590
591
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,input_width_gt_1)592 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, input_width_gt_1) {
593 TEST_REQUIRES_X86_SSE;
594 for (size_t input_width = 2; input_width < 33; input_width++) {
595 ConvHWC2CHWMicrokernelTester()
596 .kernel_size(3)
597 .subsampling(2)
598 .padding_width(1)
599 .input_channels(3)
600 .output_channels_tile(4)
601 .output_channels(4)
602 .input_width(input_width)
603 .input_height(3)
604 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
605 }
606 }
607
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,output_channels_lt_4)608 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, output_channels_lt_4) {
609 TEST_REQUIRES_X86_SSE;
610 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
611 for (size_t input_width = 1; input_width < 32; input_width += 7) {
612 ConvHWC2CHWMicrokernelTester()
613 .kernel_size(3)
614 .subsampling(2)
615 .padding_width(1)
616 .input_channels(3)
617 .output_channels_tile(4)
618 .output_channels(output_channels)
619 .input_width(input_width)
620 .input_height(3)
621 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
622 }
623 }
624 }
625
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,output_channels_div_4)626 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, output_channels_div_4) {
627 TEST_REQUIRES_X86_SSE;
628 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
629 for (size_t input_width = 1; input_width < 32; input_width += 7) {
630 ConvHWC2CHWMicrokernelTester()
631 .kernel_size(3)
632 .subsampling(2)
633 .padding_width(1)
634 .input_channels(3)
635 .output_channels_tile(4)
636 .output_channels(output_channels)
637 .input_width(input_width)
638 .input_height(3)
639 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
640 }
641 }
642 }
643
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,output_channels_gt_4)644 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, output_channels_gt_4) {
645 TEST_REQUIRES_X86_SSE;
646 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
647 for (size_t input_width = 1; input_width < 32; input_width += 7) {
648 ConvHWC2CHWMicrokernelTester()
649 .kernel_size(3)
650 .subsampling(2)
651 .padding_width(1)
652 .input_channels(3)
653 .output_channels_tile(4)
654 .output_channels(output_channels)
655 .input_width(input_width)
656 .input_height(3)
657 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
658 }
659 }
660 }
661
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,input_height_lt_3)662 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, input_height_lt_3) {
663 TEST_REQUIRES_X86_SSE;
664 for (size_t input_height = 1; input_height < 3; input_height++) {
665 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
666 for (size_t input_width = 1; input_width < 32; input_width += 7) {
667 ConvHWC2CHWMicrokernelTester()
668 .kernel_size(3)
669 .subsampling(2)
670 .padding(1)
671 .input_channels(3) // padded input height of at least 3 required
672 .output_channels_tile(4)
673 .output_channels(output_channels)
674 .input_width(input_width)
675 .input_height(input_height)
676 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
677 }
678 }
679 }
680 }
681
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,input_height_gt_3)682 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, input_height_gt_3) {
683 TEST_REQUIRES_X86_SSE;
684 for (size_t input_height = 4; input_height <= 9; input_height++) {
685 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
686 for (size_t input_width = 1; input_width < 32; input_width += 7) {
687 ConvHWC2CHWMicrokernelTester()
688 .kernel_size(3)
689 .subsampling(2)
690 .padding_width(1)
691 .input_channels(3)
692 .output_channels_tile(4)
693 .output_channels(output_channels)
694 .input_width(input_width)
695 .input_height(input_height)
696 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
697 }
698 }
699 }
700 }
701
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,padding_top)702 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, padding_top) {
703 TEST_REQUIRES_X86_SSE;
704 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
705 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
706 for (size_t input_width = 1; input_width < 32; input_width += 7) {
707 ConvHWC2CHWMicrokernelTester()
708 .kernel_size(3)
709 .subsampling(2)
710 .padding_width(1)
711 .padding_top(padding_top)
712 .input_channels(3)
713 .output_channels_tile(4)
714 .output_channels(output_channels)
715 .input_width(input_width)
716 .input_height(9)
717 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
718 }
719 }
720 }
721 }
722
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,padding_bottom)723 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, padding_bottom) {
724 TEST_REQUIRES_X86_SSE;
725 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
726 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
727 for (size_t input_width = 1; input_width < 32; input_width += 7) {
728 ConvHWC2CHWMicrokernelTester()
729 .kernel_size(3)
730 .subsampling(2)
731 .padding_width(1)
732 .padding_bottom(padding_bottom)
733 .input_channels(3)
734 .output_channels_tile(4)
735 .output_channels(output_channels)
736 .input_width(input_width)
737 .input_height(9)
738 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
739 }
740 }
741 }
742 }
743
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,output_y_start)744 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, output_y_start) {
745 TEST_REQUIRES_X86_SSE;
746 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
747 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
748 for (size_t input_width = 1; input_width < 32; input_width += 7) {
749 ConvHWC2CHWMicrokernelTester()
750 .kernel_size(3)
751 .subsampling(2)
752 .padding_width(1)
753 .input_channels(3)
754 .output_channels_tile(4)
755 .output_channels(output_channels)
756 .input_width(input_width)
757 .input_height(9)
758 .output_y_start(output_y_start)
759 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
760 }
761 }
762 }
763 }
764
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,output_y_end)765 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, output_y_end) {
766 TEST_REQUIRES_X86_SSE;
767 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
768 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
769 for (size_t input_width = 1; input_width < 32; input_width += 7) {
770 ConvHWC2CHWMicrokernelTester()
771 .kernel_size(3)
772 .subsampling(2)
773 .padding_width(1)
774 .input_channels(3)
775 .output_channels_tile(4)
776 .output_channels(output_channels)
777 .input_width(input_width)
778 .input_height(9)
779 .output_y_end(output_y_end)
780 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
781 }
782 }
783 }
784 }
785
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,qmin)786 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, qmin) {
787 TEST_REQUIRES_X86_SSE;
788 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
789 for (size_t input_width = 1; input_width < 32; input_width += 7) {
790 ConvHWC2CHWMicrokernelTester()
791 .kernel_size(3)
792 .subsampling(2)
793 .padding_width(1)
794 .input_channels(3)
795 .output_channels_tile(4)
796 .output_channels(output_channels)
797 .input_width(input_width)
798 .input_height(6)
799 .qmin(128)
800 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
801 }
802 }
803 }
804
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1,qmax)805 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_1X1, qmax) {
806 TEST_REQUIRES_X86_SSE;
807 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
808 for (size_t input_width = 1; input_width < 32; input_width += 7) {
809 ConvHWC2CHWMicrokernelTester()
810 .kernel_size(3)
811 .subsampling(2)
812 .padding_width(1)
813 .input_channels(3)
814 .output_channels_tile(4)
815 .output_channels(output_channels)
816 .input_width(input_width)
817 .input_height(6)
818 .qmax(128)
819 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1);
820 }
821 }
822 }
823 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
824
825 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,input_width_eq_1)826 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, input_width_eq_1) {
827 TEST_REQUIRES_X86_SSE;
828 ConvHWC2CHWMicrokernelTester()
829 .kernel_size(3)
830 .subsampling(2)
831 .padding_width(1)
832 .input_channels(3)
833 .output_channels_tile(4)
834 .output_channels(4)
835 .input_width(4)
836 .input_height(3)
837 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
838 }
839
840
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,input_width_gt_1)841 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, input_width_gt_1) {
842 TEST_REQUIRES_X86_SSE;
843 for (size_t input_width = 2; input_width < 33; input_width++) {
844 ConvHWC2CHWMicrokernelTester()
845 .kernel_size(3)
846 .subsampling(2)
847 .padding_width(1)
848 .input_channels(3)
849 .output_channels_tile(4)
850 .output_channels(4)
851 .input_width(input_width)
852 .input_height(3)
853 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
854 }
855 }
856
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,output_channels_lt_4)857 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, output_channels_lt_4) {
858 TEST_REQUIRES_X86_SSE;
859 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
860 for (size_t input_width = 1; input_width < 32; input_width += 7) {
861 ConvHWC2CHWMicrokernelTester()
862 .kernel_size(3)
863 .subsampling(2)
864 .padding_width(1)
865 .input_channels(3)
866 .output_channels_tile(4)
867 .output_channels(output_channels)
868 .input_width(input_width)
869 .input_height(3)
870 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
871 }
872 }
873 }
874
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,output_channels_div_4)875 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, output_channels_div_4) {
876 TEST_REQUIRES_X86_SSE;
877 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
878 for (size_t input_width = 1; input_width < 32; input_width += 7) {
879 ConvHWC2CHWMicrokernelTester()
880 .kernel_size(3)
881 .subsampling(2)
882 .padding_width(1)
883 .input_channels(3)
884 .output_channels_tile(4)
885 .output_channels(output_channels)
886 .input_width(input_width)
887 .input_height(3)
888 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
889 }
890 }
891 }
892
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,output_channels_gt_4)893 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, output_channels_gt_4) {
894 TEST_REQUIRES_X86_SSE;
895 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
896 for (size_t input_width = 1; input_width < 32; input_width += 7) {
897 ConvHWC2CHWMicrokernelTester()
898 .kernel_size(3)
899 .subsampling(2)
900 .padding_width(1)
901 .input_channels(3)
902 .output_channels_tile(4)
903 .output_channels(output_channels)
904 .input_width(input_width)
905 .input_height(3)
906 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
907 }
908 }
909 }
910
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,input_height_lt_3)911 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, input_height_lt_3) {
912 TEST_REQUIRES_X86_SSE;
913 for (size_t input_height = 1; input_height < 3; input_height++) {
914 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
915 for (size_t input_width = 1; input_width < 32; input_width += 7) {
916 ConvHWC2CHWMicrokernelTester()
917 .kernel_size(3)
918 .subsampling(2)
919 .padding(1)
920 .input_channels(3) // padded input height of at least 3 required
921 .output_channels_tile(4)
922 .output_channels(output_channels)
923 .input_width(input_width)
924 .input_height(input_height)
925 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
926 }
927 }
928 }
929 }
930
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,input_height_gt_3)931 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, input_height_gt_3) {
932 TEST_REQUIRES_X86_SSE;
933 for (size_t input_height = 4; input_height <= 9; input_height++) {
934 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
935 for (size_t input_width = 1; input_width < 32; input_width += 7) {
936 ConvHWC2CHWMicrokernelTester()
937 .kernel_size(3)
938 .subsampling(2)
939 .padding_width(1)
940 .input_channels(3)
941 .output_channels_tile(4)
942 .output_channels(output_channels)
943 .input_width(input_width)
944 .input_height(input_height)
945 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
946 }
947 }
948 }
949 }
950
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,padding_top)951 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, padding_top) {
952 TEST_REQUIRES_X86_SSE;
953 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
954 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
955 for (size_t input_width = 1; input_width < 32; input_width += 7) {
956 ConvHWC2CHWMicrokernelTester()
957 .kernel_size(3)
958 .subsampling(2)
959 .padding_width(1)
960 .padding_top(padding_top)
961 .input_channels(3)
962 .output_channels_tile(4)
963 .output_channels(output_channels)
964 .input_width(input_width)
965 .input_height(9)
966 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
967 }
968 }
969 }
970 }
971
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,padding_bottom)972 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, padding_bottom) {
973 TEST_REQUIRES_X86_SSE;
974 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
975 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
976 for (size_t input_width = 1; input_width < 32; input_width += 7) {
977 ConvHWC2CHWMicrokernelTester()
978 .kernel_size(3)
979 .subsampling(2)
980 .padding_width(1)
981 .padding_bottom(padding_bottom)
982 .input_channels(3)
983 .output_channels_tile(4)
984 .output_channels(output_channels)
985 .input_width(input_width)
986 .input_height(9)
987 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
988 }
989 }
990 }
991 }
992
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,output_y_start)993 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, output_y_start) {
994 TEST_REQUIRES_X86_SSE;
995 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
996 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
997 for (size_t input_width = 1; input_width < 32; input_width += 7) {
998 ConvHWC2CHWMicrokernelTester()
999 .kernel_size(3)
1000 .subsampling(2)
1001 .padding_width(1)
1002 .input_channels(3)
1003 .output_channels_tile(4)
1004 .output_channels(output_channels)
1005 .input_width(input_width)
1006 .input_height(9)
1007 .output_y_start(output_y_start)
1008 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
1009 }
1010 }
1011 }
1012 }
1013
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,output_y_end)1014 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, output_y_end) {
1015 TEST_REQUIRES_X86_SSE;
1016 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
1017 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1018 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1019 ConvHWC2CHWMicrokernelTester()
1020 .kernel_size(3)
1021 .subsampling(2)
1022 .padding_width(1)
1023 .input_channels(3)
1024 .output_channels_tile(4)
1025 .output_channels(output_channels)
1026 .input_width(input_width)
1027 .input_height(9)
1028 .output_y_end(output_y_end)
1029 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
1030 }
1031 }
1032 }
1033 }
1034
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,qmin)1035 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, qmin) {
1036 TEST_REQUIRES_X86_SSE;
1037 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1038 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1039 ConvHWC2CHWMicrokernelTester()
1040 .kernel_size(3)
1041 .subsampling(2)
1042 .padding_width(1)
1043 .input_channels(3)
1044 .output_channels_tile(4)
1045 .output_channels(output_channels)
1046 .input_width(input_width)
1047 .input_height(6)
1048 .qmin(128)
1049 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
1050 }
1051 }
1052 }
1053
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2,qmax)1054 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SSE_2X2, qmax) {
1055 TEST_REQUIRES_X86_SSE;
1056 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1057 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1058 ConvHWC2CHWMicrokernelTester()
1059 .kernel_size(3)
1060 .subsampling(2)
1061 .padding_width(1)
1062 .input_channels(3)
1063 .output_channels_tile(4)
1064 .output_channels(output_channels)
1065 .input_width(input_width)
1066 .input_height(6)
1067 .qmax(128)
1068 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2);
1069 }
1070 }
1071 }
1072 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1073
1074 #if XNN_ARCH_WASMSIMD
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,input_width_eq_1)1075 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, input_width_eq_1) {
1076 ConvHWC2CHWMicrokernelTester()
1077 .kernel_size(3)
1078 .subsampling(2)
1079 .padding_width(1)
1080 .input_channels(3)
1081 .output_channels_tile(4)
1082 .output_channels(4)
1083 .input_width(4)
1084 .input_height(3)
1085 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1086 }
1087
1088
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,input_width_gt_1)1089 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, input_width_gt_1) {
1090 for (size_t input_width = 2; input_width < 33; input_width++) {
1091 ConvHWC2CHWMicrokernelTester()
1092 .kernel_size(3)
1093 .subsampling(2)
1094 .padding_width(1)
1095 .input_channels(3)
1096 .output_channels_tile(4)
1097 .output_channels(4)
1098 .input_width(input_width)
1099 .input_height(3)
1100 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1101 }
1102 }
1103
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,output_channels_lt_4)1104 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, output_channels_lt_4) {
1105 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
1106 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1107 ConvHWC2CHWMicrokernelTester()
1108 .kernel_size(3)
1109 .subsampling(2)
1110 .padding_width(1)
1111 .input_channels(3)
1112 .output_channels_tile(4)
1113 .output_channels(output_channels)
1114 .input_width(input_width)
1115 .input_height(3)
1116 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1117 }
1118 }
1119 }
1120
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,output_channels_div_4)1121 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, output_channels_div_4) {
1122 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
1123 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1124 ConvHWC2CHWMicrokernelTester()
1125 .kernel_size(3)
1126 .subsampling(2)
1127 .padding_width(1)
1128 .input_channels(3)
1129 .output_channels_tile(4)
1130 .output_channels(output_channels)
1131 .input_width(input_width)
1132 .input_height(3)
1133 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1134 }
1135 }
1136 }
1137
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,output_channels_gt_4)1138 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, output_channels_gt_4) {
1139 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
1140 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1141 ConvHWC2CHWMicrokernelTester()
1142 .kernel_size(3)
1143 .subsampling(2)
1144 .padding_width(1)
1145 .input_channels(3)
1146 .output_channels_tile(4)
1147 .output_channels(output_channels)
1148 .input_width(input_width)
1149 .input_height(3)
1150 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1151 }
1152 }
1153 }
1154
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,input_height_lt_3)1155 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, input_height_lt_3) {
1156 for (size_t input_height = 1; input_height < 3; input_height++) {
1157 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1158 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1159 ConvHWC2CHWMicrokernelTester()
1160 .kernel_size(3)
1161 .subsampling(2)
1162 .padding(1)
1163 .input_channels(3) // padded input height of at least 3 required
1164 .output_channels_tile(4)
1165 .output_channels(output_channels)
1166 .input_width(input_width)
1167 .input_height(input_height)
1168 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1169 }
1170 }
1171 }
1172 }
1173
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,input_height_gt_3)1174 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, input_height_gt_3) {
1175 for (size_t input_height = 4; input_height <= 9; input_height++) {
1176 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1177 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1178 ConvHWC2CHWMicrokernelTester()
1179 .kernel_size(3)
1180 .subsampling(2)
1181 .padding_width(1)
1182 .input_channels(3)
1183 .output_channels_tile(4)
1184 .output_channels(output_channels)
1185 .input_width(input_width)
1186 .input_height(input_height)
1187 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1188 }
1189 }
1190 }
1191 }
1192
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,padding_top)1193 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, padding_top) {
1194 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
1195 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1196 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1197 ConvHWC2CHWMicrokernelTester()
1198 .kernel_size(3)
1199 .subsampling(2)
1200 .padding_width(1)
1201 .padding_top(padding_top)
1202 .input_channels(3)
1203 .output_channels_tile(4)
1204 .output_channels(output_channels)
1205 .input_width(input_width)
1206 .input_height(9)
1207 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1208 }
1209 }
1210 }
1211 }
1212
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,padding_bottom)1213 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, padding_bottom) {
1214 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
1215 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1216 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1217 ConvHWC2CHWMicrokernelTester()
1218 .kernel_size(3)
1219 .subsampling(2)
1220 .padding_width(1)
1221 .padding_bottom(padding_bottom)
1222 .input_channels(3)
1223 .output_channels_tile(4)
1224 .output_channels(output_channels)
1225 .input_width(input_width)
1226 .input_height(9)
1227 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1228 }
1229 }
1230 }
1231 }
1232
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,output_y_start)1233 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, output_y_start) {
1234 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
1235 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1236 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1237 ConvHWC2CHWMicrokernelTester()
1238 .kernel_size(3)
1239 .subsampling(2)
1240 .padding_width(1)
1241 .input_channels(3)
1242 .output_channels_tile(4)
1243 .output_channels(output_channels)
1244 .input_width(input_width)
1245 .input_height(9)
1246 .output_y_start(output_y_start)
1247 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1248 }
1249 }
1250 }
1251 }
1252
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,output_y_end)1253 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, output_y_end) {
1254 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
1255 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1256 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1257 ConvHWC2CHWMicrokernelTester()
1258 .kernel_size(3)
1259 .subsampling(2)
1260 .padding_width(1)
1261 .input_channels(3)
1262 .output_channels_tile(4)
1263 .output_channels(output_channels)
1264 .input_width(input_width)
1265 .input_height(9)
1266 .output_y_end(output_y_end)
1267 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1268 }
1269 }
1270 }
1271 }
1272
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,qmin)1273 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, qmin) {
1274 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1275 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1276 ConvHWC2CHWMicrokernelTester()
1277 .kernel_size(3)
1278 .subsampling(2)
1279 .padding_width(1)
1280 .input_channels(3)
1281 .output_channels_tile(4)
1282 .output_channels(output_channels)
1283 .input_width(input_width)
1284 .input_height(6)
1285 .qmin(128)
1286 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1287 }
1288 }
1289 }
1290
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2,qmax)1291 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__WASMSIMD_2X2, qmax) {
1292 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1293 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1294 ConvHWC2CHWMicrokernelTester()
1295 .kernel_size(3)
1296 .subsampling(2)
1297 .padding_width(1)
1298 .input_channels(3)
1299 .output_channels_tile(4)
1300 .output_channels(output_channels)
1301 .input_width(input_width)
1302 .input_height(6)
1303 .qmax(128)
1304 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1305 }
1306 }
1307 }
1308 #endif // XNN_ARCH_WASMSIMD
1309
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,input_width_eq_1)1310 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, input_width_eq_1) {
1311 ConvHWC2CHWMicrokernelTester()
1312 .kernel_size(3)
1313 .subsampling(2)
1314 .padding_width(1)
1315 .input_channels(3)
1316 .output_channels_tile(4)
1317 .output_channels(4)
1318 .input_width(4)
1319 .input_height(3)
1320 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1321 }
1322
1323
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,input_width_gt_1)1324 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, input_width_gt_1) {
1325 for (size_t input_width = 2; input_width < 33; input_width++) {
1326 ConvHWC2CHWMicrokernelTester()
1327 .kernel_size(3)
1328 .subsampling(2)
1329 .padding_width(1)
1330 .input_channels(3)
1331 .output_channels_tile(4)
1332 .output_channels(4)
1333 .input_width(input_width)
1334 .input_height(3)
1335 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1336 }
1337 }
1338
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,output_channels_lt_4)1339 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, output_channels_lt_4) {
1340 for (size_t output_channels = 1; output_channels < 4; output_channels++) {
1341 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1342 ConvHWC2CHWMicrokernelTester()
1343 .kernel_size(3)
1344 .subsampling(2)
1345 .padding_width(1)
1346 .input_channels(3)
1347 .output_channels_tile(4)
1348 .output_channels(output_channels)
1349 .input_width(input_width)
1350 .input_height(3)
1351 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1352 }
1353 }
1354 }
1355
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,output_channels_div_4)1356 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, output_channels_div_4) {
1357 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
1358 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1359 ConvHWC2CHWMicrokernelTester()
1360 .kernel_size(3)
1361 .subsampling(2)
1362 .padding_width(1)
1363 .input_channels(3)
1364 .output_channels_tile(4)
1365 .output_channels(output_channels)
1366 .input_width(input_width)
1367 .input_height(3)
1368 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1369 }
1370 }
1371 }
1372
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,output_channels_gt_4)1373 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, output_channels_gt_4) {
1374 for (size_t output_channels = 5; output_channels < 8; output_channels++) {
1375 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1376 ConvHWC2CHWMicrokernelTester()
1377 .kernel_size(3)
1378 .subsampling(2)
1379 .padding_width(1)
1380 .input_channels(3)
1381 .output_channels_tile(4)
1382 .output_channels(output_channels)
1383 .input_width(input_width)
1384 .input_height(3)
1385 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1386 }
1387 }
1388 }
1389
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,input_height_lt_3)1390 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, input_height_lt_3) {
1391 for (size_t input_height = 1; input_height < 3; input_height++) {
1392 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1393 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1394 ConvHWC2CHWMicrokernelTester()
1395 .kernel_size(3)
1396 .subsampling(2)
1397 .padding(1)
1398 .input_channels(3) // padded input height of at least 3 required
1399 .output_channels_tile(4)
1400 .output_channels(output_channels)
1401 .input_width(input_width)
1402 .input_height(input_height)
1403 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1404 }
1405 }
1406 }
1407 }
1408
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,input_height_gt_3)1409 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, input_height_gt_3) {
1410 for (size_t input_height = 4; input_height <= 9; input_height++) {
1411 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1412 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1413 ConvHWC2CHWMicrokernelTester()
1414 .kernel_size(3)
1415 .subsampling(2)
1416 .padding_width(1)
1417 .input_channels(3)
1418 .output_channels_tile(4)
1419 .output_channels(output_channels)
1420 .input_width(input_width)
1421 .input_height(input_height)
1422 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1423 }
1424 }
1425 }
1426 }
1427
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,padding_top)1428 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, padding_top) {
1429 for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
1430 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1431 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1432 ConvHWC2CHWMicrokernelTester()
1433 .kernel_size(3)
1434 .subsampling(2)
1435 .padding_width(1)
1436 .padding_top(padding_top)
1437 .input_channels(3)
1438 .output_channels_tile(4)
1439 .output_channels(output_channels)
1440 .input_width(input_width)
1441 .input_height(9)
1442 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1443 }
1444 }
1445 }
1446 }
1447
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,padding_bottom)1448 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, padding_bottom) {
1449 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
1450 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
1451 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1452 ConvHWC2CHWMicrokernelTester()
1453 .kernel_size(3)
1454 .subsampling(2)
1455 .padding_width(1)
1456 .padding_bottom(padding_bottom)
1457 .input_channels(3)
1458 .output_channels_tile(4)
1459 .output_channels(output_channels)
1460 .input_width(input_width)
1461 .input_height(9)
1462 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1463 }
1464 }
1465 }
1466 }
1467
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,output_y_start)1468 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, output_y_start) {
1469 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
1470 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1471 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1472 ConvHWC2CHWMicrokernelTester()
1473 .kernel_size(3)
1474 .subsampling(2)
1475 .padding_width(1)
1476 .input_channels(3)
1477 .output_channels_tile(4)
1478 .output_channels(output_channels)
1479 .input_width(input_width)
1480 .input_height(9)
1481 .output_y_start(output_y_start)
1482 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1483 }
1484 }
1485 }
1486 }
1487
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,output_y_end)1488 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, output_y_end) {
1489 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
1490 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1491 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1492 ConvHWC2CHWMicrokernelTester()
1493 .kernel_size(3)
1494 .subsampling(2)
1495 .padding_width(1)
1496 .input_channels(3)
1497 .output_channels_tile(4)
1498 .output_channels(output_channels)
1499 .input_width(input_width)
1500 .input_height(9)
1501 .output_y_end(output_y_end)
1502 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1503 }
1504 }
1505 }
1506 }
1507
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,qmin)1508 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, qmin) {
1509 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1510 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1511 ConvHWC2CHWMicrokernelTester()
1512 .kernel_size(3)
1513 .subsampling(2)
1514 .padding_width(1)
1515 .input_channels(3)
1516 .output_channels_tile(4)
1517 .output_channels(output_channels)
1518 .input_width(input_width)
1519 .input_height(6)
1520 .qmin(128)
1521 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1522 }
1523 }
1524 }
1525
TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1,qmax)1526 TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, qmax) {
1527 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
1528 for (size_t input_width = 1; input_width < 32; input_width += 7) {
1529 ConvHWC2CHWMicrokernelTester()
1530 .kernel_size(3)
1531 .subsampling(2)
1532 .padding_width(1)
1533 .input_channels(3)
1534 .output_channels_tile(4)
1535 .output_channels(output_channels)
1536 .input_width(input_width)
1537 .input_height(6)
1538 .qmax(128)
1539 .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
1540 }
1541 }
1542 }
1543