• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 //   Specification: test/f32-argmaxpool.yaml
8 //   Generator: tools/generate-argmaxpool-test.py
9 
10 
11 #include <gtest/gtest.h>
12 
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15 
16 #include <xnnpack/argmaxpool.h>
17 #include "argmaxpool-microkernel-tester.h"
18 
19 
20 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_ARGMAXPOOL_4X__NEON_C4,channels_eq_4_unipass_fulltile)21   TEST(F32_ARGMAXPOOL_4X__NEON_C4, channels_eq_4_unipass_fulltile) {
22     TEST_REQUIRES_ARM_NEON;
23     ArgMaxPoolMicrokernelTester()
24       .pooling_elements(4)
25       .pooling_tile(4)
26       .channels(4)
27       .Test(xnn_f32_argmaxpool_ukernel_4x__neon_c4);
28   }
29 
TEST(F32_ARGMAXPOOL_4X__NEON_C4,channels_eq_4_unipass_fulltile_with_input_offset)30   TEST(F32_ARGMAXPOOL_4X__NEON_C4, channels_eq_4_unipass_fulltile_with_input_offset) {
31     TEST_REQUIRES_ARM_NEON;
32     ArgMaxPoolMicrokernelTester()
33       .pooling_elements(4)
34       .pooling_tile(4)
35       .channels(4)
36       .input_offset(7)
37       .Test(xnn_f32_argmaxpool_ukernel_4x__neon_c4);
38   }
39 
TEST(F32_ARGMAXPOOL_4X__NEON_C4,channels_eq_4_unipass_subtile)40   TEST(F32_ARGMAXPOOL_4X__NEON_C4, channels_eq_4_unipass_subtile) {
41     TEST_REQUIRES_ARM_NEON;
42     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
43       ArgMaxPoolMicrokernelTester()
44         .pooling_elements(pooling_elements)
45         .pooling_tile(4)
46         .channels(4)
47         .Test(xnn_f32_argmaxpool_ukernel_4x__neon_c4);
48     }
49   }
50 
TEST(F32_ARGMAXPOOL_4X__NEON_C4,channels_eq_4_unipass_subtile_with_input_offset)51   TEST(F32_ARGMAXPOOL_4X__NEON_C4, channels_eq_4_unipass_subtile_with_input_offset) {
52     TEST_REQUIRES_ARM_NEON;
53     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
54       ArgMaxPoolMicrokernelTester()
55         .pooling_elements(pooling_elements)
56         .pooling_tile(4)
57         .channels(4)
58         .input_offset(7)
59         .Test(xnn_f32_argmaxpool_ukernel_4x__neon_c4);
60     }
61   }
62 
TEST(F32_ARGMAXPOOL_4X__NEON_C4,channels_div_4_unipass_fulltile)63   TEST(F32_ARGMAXPOOL_4X__NEON_C4, channels_div_4_unipass_fulltile) {
64     TEST_REQUIRES_ARM_NEON;
65     for (size_t channels = 8; channels < 32; channels += 4) {
66       ArgMaxPoolMicrokernelTester()
67         .pooling_elements(4)
68         .pooling_tile(4)
69         .channels(channels)
70         .Test(xnn_f32_argmaxpool_ukernel_4x__neon_c4);
71     }
72   }
73 
TEST(F32_ARGMAXPOOL_4X__NEON_C4,channels_div_4_unipass_fulltile_with_input_offset)74   TEST(F32_ARGMAXPOOL_4X__NEON_C4, channels_div_4_unipass_fulltile_with_input_offset) {
75     TEST_REQUIRES_ARM_NEON;
76     for (size_t channels = 8; channels < 32; channels += 4) {
77       ArgMaxPoolMicrokernelTester()
78         .pooling_elements(4)
79         .pooling_tile(4)
80         .channels(channels)
81         .input_offset(37)
82         .Test(xnn_f32_argmaxpool_ukernel_4x__neon_c4);
83     }
84   }
85 
TEST(F32_ARGMAXPOOL_4X__NEON_C4,channels_div_4_unipass_subtile)86   TEST(F32_ARGMAXPOOL_4X__NEON_C4, channels_div_4_unipass_subtile) {
87     TEST_REQUIRES_ARM_NEON;
88     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
89       for (size_t channels = 8; channels < 32; channels += 4) {
90         ArgMaxPoolMicrokernelTester()
91           .pooling_elements(pooling_elements)
92           .pooling_tile(4)
93           .channels(channels)
94           .Test(xnn_f32_argmaxpool_ukernel_4x__neon_c4);
95       }
96     }
97   }
98 
TEST(F32_ARGMAXPOOL_4X__NEON_C4,channels_div_4_unipass_subtile_with_input_offset)99   TEST(F32_ARGMAXPOOL_4X__NEON_C4, channels_div_4_unipass_subtile_with_input_offset) {
100     TEST_REQUIRES_ARM_NEON;
101     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
102       for (size_t channels = 8; channels < 32; channels += 4) {
103         ArgMaxPoolMicrokernelTester()
104           .pooling_elements(pooling_elements)
105           .pooling_tile(4)
106           .channels(channels)
107           .input_offset(37)
108           .Test(xnn_f32_argmaxpool_ukernel_4x__neon_c4);
109       }
110     }
111   }
112 
TEST(F32_ARGMAXPOOL_4X__NEON_C4,channels_lt_4_unipass_fulltile)113   TEST(F32_ARGMAXPOOL_4X__NEON_C4, channels_lt_4_unipass_fulltile) {
114     TEST_REQUIRES_ARM_NEON;
115     for (size_t channels = 1; channels < 4; channels++) {
116       ArgMaxPoolMicrokernelTester()
117         .pooling_elements(4)
118         .pooling_tile(4)
119         .channels(channels)
120         .Test(xnn_f32_argmaxpool_ukernel_4x__neon_c4);
121     }
122   }
123 
TEST(F32_ARGMAXPOOL_4X__NEON_C4,channels_lt_4_unipass_fulltile_with_input_offset)124   TEST(F32_ARGMAXPOOL_4X__NEON_C4, channels_lt_4_unipass_fulltile_with_input_offset) {
125     TEST_REQUIRES_ARM_NEON;
126     for (size_t channels = 1; channels < 4; channels++) {
127       ArgMaxPoolMicrokernelTester()
128         .pooling_elements(4)
129         .pooling_tile(4)
130         .channels(channels)
131         .input_offset(5)
132         .Test(xnn_f32_argmaxpool_ukernel_4x__neon_c4);
133     }
134   }
135 
TEST(F32_ARGMAXPOOL_4X__NEON_C4,channels_lt_4_unipass_subtile)136   TEST(F32_ARGMAXPOOL_4X__NEON_C4, channels_lt_4_unipass_subtile) {
137     TEST_REQUIRES_ARM_NEON;
138     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
139       for (size_t channels = 1; channels < 4; channels++) {
140         ArgMaxPoolMicrokernelTester()
141           .pooling_elements(pooling_elements)
142           .pooling_tile(4)
143           .channels(channels)
144           .Test(xnn_f32_argmaxpool_ukernel_4x__neon_c4);
145       }
146     }
147   }
148 
TEST(F32_ARGMAXPOOL_4X__NEON_C4,channels_lt_4_unipass_subtile_with_input_offset)149   TEST(F32_ARGMAXPOOL_4X__NEON_C4, channels_lt_4_unipass_subtile_with_input_offset) {
150     TEST_REQUIRES_ARM_NEON;
151     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
152       for (size_t channels = 1; channels < 4; channels++) {
153         ArgMaxPoolMicrokernelTester()
154           .pooling_elements(pooling_elements)
155           .pooling_tile(4)
156           .channels(channels)
157           .input_offset(5)
158           .Test(xnn_f32_argmaxpool_ukernel_4x__neon_c4);
159       }
160     }
161   }
162 
TEST(F32_ARGMAXPOOL_4X__NEON_C4,channels_gt_4_unipass_fulltile)163   TEST(F32_ARGMAXPOOL_4X__NEON_C4, channels_gt_4_unipass_fulltile) {
164     TEST_REQUIRES_ARM_NEON;
165     for (size_t channels = 5; channels < 8; channels++) {
166       ArgMaxPoolMicrokernelTester()
167         .pooling_elements(4)
168         .pooling_tile(4)
169         .channels(channels)
170         .Test(xnn_f32_argmaxpool_ukernel_4x__neon_c4);
171     }
172   }
173 
TEST(F32_ARGMAXPOOL_4X__NEON_C4,channels_gt_4_unipass_fulltile_with_input_offset)174   TEST(F32_ARGMAXPOOL_4X__NEON_C4, channels_gt_4_unipass_fulltile_with_input_offset) {
175     TEST_REQUIRES_ARM_NEON;
176     for (size_t channels = 5; channels < 8; channels++) {
177       ArgMaxPoolMicrokernelTester()
178         .pooling_elements(4)
179         .pooling_tile(4)
180         .channels(channels)
181         .input_offset(11)
182         .Test(xnn_f32_argmaxpool_ukernel_4x__neon_c4);
183     }
184   }
185 
TEST(F32_ARGMAXPOOL_4X__NEON_C4,channels_gt_4_unipass_subtile)186   TEST(F32_ARGMAXPOOL_4X__NEON_C4, channels_gt_4_unipass_subtile) {
187     TEST_REQUIRES_ARM_NEON;
188     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
189       for (size_t channels = 5; channels < 8; channels++) {
190         ArgMaxPoolMicrokernelTester()
191           .pooling_elements(pooling_elements)
192           .pooling_tile(4)
193           .channels(channels)
194           .Test(xnn_f32_argmaxpool_ukernel_4x__neon_c4);
195       }
196     }
197   }
198 
TEST(F32_ARGMAXPOOL_4X__NEON_C4,channels_gt_4_unipass_subtile_with_input_offset)199   TEST(F32_ARGMAXPOOL_4X__NEON_C4, channels_gt_4_unipass_subtile_with_input_offset) {
200     TEST_REQUIRES_ARM_NEON;
201     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
202       for (size_t channels = 5; channels < 8; channels++) {
203         ArgMaxPoolMicrokernelTester()
204           .pooling_elements(pooling_elements)
205           .pooling_tile(4)
206           .channels(channels)
207           .input_offset(11)
208           .Test(xnn_f32_argmaxpool_ukernel_4x__neon_c4);
209       }
210     }
211   }
212 
TEST(F32_ARGMAXPOOL_4X__NEON_C4,few_output_pixels)213   TEST(F32_ARGMAXPOOL_4X__NEON_C4, few_output_pixels) {
214     TEST_REQUIRES_ARM_NEON;
215     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
216       for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
217         for (size_t channels = 1; channels <= 20; channels += 3) {
218           ArgMaxPoolMicrokernelTester()
219             .output_pixels(output_pixels)
220             .pooling_elements(pooling_elements)
221             .pooling_tile(4)
222             .channels(channels)
223             .Test(xnn_f32_argmaxpool_ukernel_4x__neon_c4);
224         }
225       }
226     }
227   }
228 
TEST(F32_ARGMAXPOOL_4X__NEON_C4,few_output_pixels_with_input_offset)229   TEST(F32_ARGMAXPOOL_4X__NEON_C4, few_output_pixels_with_input_offset) {
230     TEST_REQUIRES_ARM_NEON;
231     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
232       for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
233         for (size_t channels = 1; channels <= 20; channels += 3) {
234           ArgMaxPoolMicrokernelTester()
235             .output_pixels(output_pixels)
236             .pooling_elements(pooling_elements)
237             .pooling_tile(4)
238             .channels(channels)
239             .input_offset(23)
240             .Test(xnn_f32_argmaxpool_ukernel_4x__neon_c4);
241         }
242       }
243     }
244   }
245 
TEST(F32_ARGMAXPOOL_4X__NEON_C4,few_output_pixels_with_output_stride)246   TEST(F32_ARGMAXPOOL_4X__NEON_C4, few_output_pixels_with_output_stride) {
247     TEST_REQUIRES_ARM_NEON;
248     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
249       for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
250         for (size_t channels = 1; channels <= 20; channels += 3) {
251           ArgMaxPoolMicrokernelTester()
252             .output_pixels(output_pixels)
253             .pooling_elements(pooling_elements)
254             .pooling_tile(4)
255             .channels(channels)
256             .output_stride(23)
257             .Test(xnn_f32_argmaxpool_ukernel_4x__neon_c4);
258         }
259       }
260     }
261   }
262 
TEST(F32_ARGMAXPOOL_4X__NEON_C4,few_output_pixels_with_step)263   TEST(F32_ARGMAXPOOL_4X__NEON_C4, few_output_pixels_with_step) {
264     TEST_REQUIRES_ARM_NEON;
265     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
266       for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
267         for (size_t channels = 1; channels <= 20; channels += 3) {
268           for (size_t step = 2; step <= pooling_elements; step++) {
269             ArgMaxPoolMicrokernelTester()
270               .output_pixels(output_pixels)
271               .pooling_elements(pooling_elements)
272               .pooling_tile(4)
273               .step(step)
274               .channels(channels)
275               .output_stride(23)
276               .Test(xnn_f32_argmaxpool_ukernel_4x__neon_c4);
277           }
278         }
279       }
280     }
281   }
282 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
283 
284 
285 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_eq_4_unipass_fulltile)286   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_eq_4_unipass_fulltile) {
287     TEST_REQUIRES_X86_SSE2;
288     ArgMaxPoolMicrokernelTester()
289       .pooling_elements(4)
290       .pooling_tile(4)
291       .channels(4)
292       .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
293   }
294 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_eq_4_unipass_fulltile_with_input_offset)295   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_eq_4_unipass_fulltile_with_input_offset) {
296     TEST_REQUIRES_X86_SSE2;
297     ArgMaxPoolMicrokernelTester()
298       .pooling_elements(4)
299       .pooling_tile(4)
300       .channels(4)
301       .input_offset(7)
302       .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
303   }
304 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_eq_4_unipass_subtile)305   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_eq_4_unipass_subtile) {
306     TEST_REQUIRES_X86_SSE2;
307     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
308       ArgMaxPoolMicrokernelTester()
309         .pooling_elements(pooling_elements)
310         .pooling_tile(4)
311         .channels(4)
312         .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
313     }
314   }
315 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_eq_4_unipass_subtile_with_input_offset)316   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_eq_4_unipass_subtile_with_input_offset) {
317     TEST_REQUIRES_X86_SSE2;
318     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
319       ArgMaxPoolMicrokernelTester()
320         .pooling_elements(pooling_elements)
321         .pooling_tile(4)
322         .channels(4)
323         .input_offset(7)
324         .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
325     }
326   }
327 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_div_4_unipass_fulltile)328   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_div_4_unipass_fulltile) {
329     TEST_REQUIRES_X86_SSE2;
330     for (size_t channels = 8; channels < 32; channels += 4) {
331       ArgMaxPoolMicrokernelTester()
332         .pooling_elements(4)
333         .pooling_tile(4)
334         .channels(channels)
335         .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
336     }
337   }
338 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_div_4_unipass_fulltile_with_input_offset)339   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_div_4_unipass_fulltile_with_input_offset) {
340     TEST_REQUIRES_X86_SSE2;
341     for (size_t channels = 8; channels < 32; channels += 4) {
342       ArgMaxPoolMicrokernelTester()
343         .pooling_elements(4)
344         .pooling_tile(4)
345         .channels(channels)
346         .input_offset(37)
347         .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
348     }
349   }
350 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_div_4_unipass_subtile)351   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_div_4_unipass_subtile) {
352     TEST_REQUIRES_X86_SSE2;
353     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
354       for (size_t channels = 8; channels < 32; channels += 4) {
355         ArgMaxPoolMicrokernelTester()
356           .pooling_elements(pooling_elements)
357           .pooling_tile(4)
358           .channels(channels)
359           .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
360       }
361     }
362   }
363 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_div_4_unipass_subtile_with_input_offset)364   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_div_4_unipass_subtile_with_input_offset) {
365     TEST_REQUIRES_X86_SSE2;
366     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
367       for (size_t channels = 8; channels < 32; channels += 4) {
368         ArgMaxPoolMicrokernelTester()
369           .pooling_elements(pooling_elements)
370           .pooling_tile(4)
371           .channels(channels)
372           .input_offset(37)
373           .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
374       }
375     }
376   }
377 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_lt_4_unipass_fulltile)378   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_lt_4_unipass_fulltile) {
379     TEST_REQUIRES_X86_SSE2;
380     for (size_t channels = 1; channels < 4; channels++) {
381       ArgMaxPoolMicrokernelTester()
382         .pooling_elements(4)
383         .pooling_tile(4)
384         .channels(channels)
385         .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
386     }
387   }
388 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_lt_4_unipass_fulltile_with_input_offset)389   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_lt_4_unipass_fulltile_with_input_offset) {
390     TEST_REQUIRES_X86_SSE2;
391     for (size_t channels = 1; channels < 4; channels++) {
392       ArgMaxPoolMicrokernelTester()
393         .pooling_elements(4)
394         .pooling_tile(4)
395         .channels(channels)
396         .input_offset(5)
397         .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
398     }
399   }
400 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_lt_4_unipass_subtile)401   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_lt_4_unipass_subtile) {
402     TEST_REQUIRES_X86_SSE2;
403     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
404       for (size_t channels = 1; channels < 4; channels++) {
405         ArgMaxPoolMicrokernelTester()
406           .pooling_elements(pooling_elements)
407           .pooling_tile(4)
408           .channels(channels)
409           .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
410       }
411     }
412   }
413 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_lt_4_unipass_subtile_with_input_offset)414   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_lt_4_unipass_subtile_with_input_offset) {
415     TEST_REQUIRES_X86_SSE2;
416     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
417       for (size_t channels = 1; channels < 4; channels++) {
418         ArgMaxPoolMicrokernelTester()
419           .pooling_elements(pooling_elements)
420           .pooling_tile(4)
421           .channels(channels)
422           .input_offset(5)
423           .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
424       }
425     }
426   }
427 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_gt_4_unipass_fulltile)428   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_gt_4_unipass_fulltile) {
429     TEST_REQUIRES_X86_SSE2;
430     for (size_t channels = 5; channels < 8; channels++) {
431       ArgMaxPoolMicrokernelTester()
432         .pooling_elements(4)
433         .pooling_tile(4)
434         .channels(channels)
435         .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
436     }
437   }
438 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_gt_4_unipass_fulltile_with_input_offset)439   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_gt_4_unipass_fulltile_with_input_offset) {
440     TEST_REQUIRES_X86_SSE2;
441     for (size_t channels = 5; channels < 8; channels++) {
442       ArgMaxPoolMicrokernelTester()
443         .pooling_elements(4)
444         .pooling_tile(4)
445         .channels(channels)
446         .input_offset(11)
447         .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
448     }
449   }
450 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_gt_4_unipass_subtile)451   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_gt_4_unipass_subtile) {
452     TEST_REQUIRES_X86_SSE2;
453     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
454       for (size_t channels = 5; channels < 8; channels++) {
455         ArgMaxPoolMicrokernelTester()
456           .pooling_elements(pooling_elements)
457           .pooling_tile(4)
458           .channels(channels)
459           .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
460       }
461     }
462   }
463 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_gt_4_unipass_subtile_with_input_offset)464   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_gt_4_unipass_subtile_with_input_offset) {
465     TEST_REQUIRES_X86_SSE2;
466     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
467       for (size_t channels = 5; channels < 8; channels++) {
468         ArgMaxPoolMicrokernelTester()
469           .pooling_elements(pooling_elements)
470           .pooling_tile(4)
471           .channels(channels)
472           .input_offset(11)
473           .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
474       }
475     }
476   }
477 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,few_output_pixels)478   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, few_output_pixels) {
479     TEST_REQUIRES_X86_SSE2;
480     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
481       for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
482         for (size_t channels = 1; channels <= 20; channels += 3) {
483           ArgMaxPoolMicrokernelTester()
484             .output_pixels(output_pixels)
485             .pooling_elements(pooling_elements)
486             .pooling_tile(4)
487             .channels(channels)
488             .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
489         }
490       }
491     }
492   }
493 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,few_output_pixels_with_input_offset)494   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, few_output_pixels_with_input_offset) {
495     TEST_REQUIRES_X86_SSE2;
496     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
497       for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
498         for (size_t channels = 1; channels <= 20; channels += 3) {
499           ArgMaxPoolMicrokernelTester()
500             .output_pixels(output_pixels)
501             .pooling_elements(pooling_elements)
502             .pooling_tile(4)
503             .channels(channels)
504             .input_offset(23)
505             .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
506         }
507       }
508     }
509   }
510 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,few_output_pixels_with_output_stride)511   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, few_output_pixels_with_output_stride) {
512     TEST_REQUIRES_X86_SSE2;
513     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
514       for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
515         for (size_t channels = 1; channels <= 20; channels += 3) {
516           ArgMaxPoolMicrokernelTester()
517             .output_pixels(output_pixels)
518             .pooling_elements(pooling_elements)
519             .pooling_tile(4)
520             .channels(channels)
521             .output_stride(23)
522             .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
523         }
524       }
525     }
526   }
527 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,few_output_pixels_with_step)528   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, few_output_pixels_with_step) {
529     TEST_REQUIRES_X86_SSE2;
530     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
531       for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
532         for (size_t channels = 1; channels <= 20; channels += 3) {
533           for (size_t step = 2; step <= pooling_elements; step++) {
534             ArgMaxPoolMicrokernelTester()
535               .output_pixels(output_pixels)
536               .pooling_elements(pooling_elements)
537               .pooling_tile(4)
538               .step(step)
539               .channels(channels)
540               .output_stride(23)
541               .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
542           }
543         }
544       }
545     }
546   }
547 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
548 
549 
550 #if XNN_ARCH_WASMSIMD
TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4,channels_eq_4_unipass_fulltile)551   TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4, channels_eq_4_unipass_fulltile) {
552     ArgMaxPoolMicrokernelTester()
553       .pooling_elements(4)
554       .pooling_tile(4)
555       .channels(4)
556       .Test(xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4);
557   }
558 
TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4,channels_eq_4_unipass_fulltile_with_input_offset)559   TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4, channels_eq_4_unipass_fulltile_with_input_offset) {
560     ArgMaxPoolMicrokernelTester()
561       .pooling_elements(4)
562       .pooling_tile(4)
563       .channels(4)
564       .input_offset(7)
565       .Test(xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4);
566   }
567 
TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4,channels_eq_4_unipass_subtile)568   TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4, channels_eq_4_unipass_subtile) {
569     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
570       ArgMaxPoolMicrokernelTester()
571         .pooling_elements(pooling_elements)
572         .pooling_tile(4)
573         .channels(4)
574         .Test(xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4);
575     }
576   }
577 
TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4,channels_eq_4_unipass_subtile_with_input_offset)578   TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4, channels_eq_4_unipass_subtile_with_input_offset) {
579     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
580       ArgMaxPoolMicrokernelTester()
581         .pooling_elements(pooling_elements)
582         .pooling_tile(4)
583         .channels(4)
584         .input_offset(7)
585         .Test(xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4);
586     }
587   }
588 
TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4,channels_div_4_unipass_fulltile)589   TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4, channels_div_4_unipass_fulltile) {
590     for (size_t channels = 8; channels < 32; channels += 4) {
591       ArgMaxPoolMicrokernelTester()
592         .pooling_elements(4)
593         .pooling_tile(4)
594         .channels(channels)
595         .Test(xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4);
596     }
597   }
598 
TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4,channels_div_4_unipass_fulltile_with_input_offset)599   TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4, channels_div_4_unipass_fulltile_with_input_offset) {
600     for (size_t channels = 8; channels < 32; channels += 4) {
601       ArgMaxPoolMicrokernelTester()
602         .pooling_elements(4)
603         .pooling_tile(4)
604         .channels(channels)
605         .input_offset(37)
606         .Test(xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4);
607     }
608   }
609 
TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4,channels_div_4_unipass_subtile)610   TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4, channels_div_4_unipass_subtile) {
611     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
612       for (size_t channels = 8; channels < 32; channels += 4) {
613         ArgMaxPoolMicrokernelTester()
614           .pooling_elements(pooling_elements)
615           .pooling_tile(4)
616           .channels(channels)
617           .Test(xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4);
618       }
619     }
620   }
621 
TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4,channels_div_4_unipass_subtile_with_input_offset)622   TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4, channels_div_4_unipass_subtile_with_input_offset) {
623     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
624       for (size_t channels = 8; channels < 32; channels += 4) {
625         ArgMaxPoolMicrokernelTester()
626           .pooling_elements(pooling_elements)
627           .pooling_tile(4)
628           .channels(channels)
629           .input_offset(37)
630           .Test(xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4);
631       }
632     }
633   }
634 
TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4,channels_lt_4_unipass_fulltile)635   TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4, channels_lt_4_unipass_fulltile) {
636     for (size_t channels = 1; channels < 4; channels++) {
637       ArgMaxPoolMicrokernelTester()
638         .pooling_elements(4)
639         .pooling_tile(4)
640         .channels(channels)
641         .Test(xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4);
642     }
643   }
644 
TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4,channels_lt_4_unipass_fulltile_with_input_offset)645   TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4, channels_lt_4_unipass_fulltile_with_input_offset) {
646     for (size_t channels = 1; channels < 4; channels++) {
647       ArgMaxPoolMicrokernelTester()
648         .pooling_elements(4)
649         .pooling_tile(4)
650         .channels(channels)
651         .input_offset(5)
652         .Test(xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4);
653     }
654   }
655 
TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4,channels_lt_4_unipass_subtile)656   TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4, channels_lt_4_unipass_subtile) {
657     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
658       for (size_t channels = 1; channels < 4; channels++) {
659         ArgMaxPoolMicrokernelTester()
660           .pooling_elements(pooling_elements)
661           .pooling_tile(4)
662           .channels(channels)
663           .Test(xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4);
664       }
665     }
666   }
667 
TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4,channels_lt_4_unipass_subtile_with_input_offset)668   TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4, channels_lt_4_unipass_subtile_with_input_offset) {
669     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
670       for (size_t channels = 1; channels < 4; channels++) {
671         ArgMaxPoolMicrokernelTester()
672           .pooling_elements(pooling_elements)
673           .pooling_tile(4)
674           .channels(channels)
675           .input_offset(5)
676           .Test(xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4);
677       }
678     }
679   }
680 
TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4,channels_gt_4_unipass_fulltile)681   TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4, channels_gt_4_unipass_fulltile) {
682     for (size_t channels = 5; channels < 8; channels++) {
683       ArgMaxPoolMicrokernelTester()
684         .pooling_elements(4)
685         .pooling_tile(4)
686         .channels(channels)
687         .Test(xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4);
688     }
689   }
690 
TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4,channels_gt_4_unipass_fulltile_with_input_offset)691   TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4, channels_gt_4_unipass_fulltile_with_input_offset) {
692     for (size_t channels = 5; channels < 8; channels++) {
693       ArgMaxPoolMicrokernelTester()
694         .pooling_elements(4)
695         .pooling_tile(4)
696         .channels(channels)
697         .input_offset(11)
698         .Test(xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4);
699     }
700   }
701 
TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4,channels_gt_4_unipass_subtile)702   TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4, channels_gt_4_unipass_subtile) {
703     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
704       for (size_t channels = 5; channels < 8; channels++) {
705         ArgMaxPoolMicrokernelTester()
706           .pooling_elements(pooling_elements)
707           .pooling_tile(4)
708           .channels(channels)
709           .Test(xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4);
710       }
711     }
712   }
713 
TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4,channels_gt_4_unipass_subtile_with_input_offset)714   TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4, channels_gt_4_unipass_subtile_with_input_offset) {
715     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
716       for (size_t channels = 5; channels < 8; channels++) {
717         ArgMaxPoolMicrokernelTester()
718           .pooling_elements(pooling_elements)
719           .pooling_tile(4)
720           .channels(channels)
721           .input_offset(11)
722           .Test(xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4);
723       }
724     }
725   }
726 
TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4,few_output_pixels)727   TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4, few_output_pixels) {
728     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
729       for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
730         for (size_t channels = 1; channels <= 20; channels += 3) {
731           ArgMaxPoolMicrokernelTester()
732             .output_pixels(output_pixels)
733             .pooling_elements(pooling_elements)
734             .pooling_tile(4)
735             .channels(channels)
736             .Test(xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4);
737         }
738       }
739     }
740   }
741 
TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4,few_output_pixels_with_input_offset)742   TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4, few_output_pixels_with_input_offset) {
743     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
744       for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
745         for (size_t channels = 1; channels <= 20; channels += 3) {
746           ArgMaxPoolMicrokernelTester()
747             .output_pixels(output_pixels)
748             .pooling_elements(pooling_elements)
749             .pooling_tile(4)
750             .channels(channels)
751             .input_offset(23)
752             .Test(xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4);
753         }
754       }
755     }
756   }
757 
TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4,few_output_pixels_with_output_stride)758   TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4, few_output_pixels_with_output_stride) {
759     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
760       for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
761         for (size_t channels = 1; channels <= 20; channels += 3) {
762           ArgMaxPoolMicrokernelTester()
763             .output_pixels(output_pixels)
764             .pooling_elements(pooling_elements)
765             .pooling_tile(4)
766             .channels(channels)
767             .output_stride(23)
768             .Test(xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4);
769         }
770       }
771     }
772   }
773 
TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4,few_output_pixels_with_step)774   TEST(F32_ARGMAXPOOL_4X__WASMSIMD_C4, few_output_pixels_with_step) {
775     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
776       for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
777         for (size_t channels = 1; channels <= 20; channels += 3) {
778           for (size_t step = 2; step <= pooling_elements; step++) {
779             ArgMaxPoolMicrokernelTester()
780               .output_pixels(output_pixels)
781               .pooling_elements(pooling_elements)
782               .pooling_tile(4)
783               .step(step)
784               .channels(channels)
785               .output_stride(23)
786               .Test(xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4);
787           }
788         }
789       }
790     }
791   }
792 #endif  // XNN_ARCH_WASMSIMD
793 
794 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,channels_eq_1_unipass_fulltile)795 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, channels_eq_1_unipass_fulltile) {
796   ArgMaxPoolMicrokernelTester()
797     .pooling_elements(4)
798     .pooling_tile(4)
799     .channels(1)
800     .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
801 }
802 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,channels_eq_1_unipass_fulltile_with_input_offset)803 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, channels_eq_1_unipass_fulltile_with_input_offset) {
804   ArgMaxPoolMicrokernelTester()
805     .pooling_elements(4)
806     .pooling_tile(4)
807     .channels(1)
808     .input_offset(3)
809     .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
810 }
811 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,channels_eq_1_unipass_subtile)812 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, channels_eq_1_unipass_subtile) {
813   for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
814     ArgMaxPoolMicrokernelTester()
815       .pooling_elements(pooling_elements)
816       .pooling_tile(4)
817       .channels(1)
818       .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
819   }
820 }
821 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,channels_eq_1_unipass_subtile_with_input_offset)822 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, channels_eq_1_unipass_subtile_with_input_offset) {
823   for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
824     ArgMaxPoolMicrokernelTester()
825       .pooling_elements(pooling_elements)
826       .pooling_tile(4)
827       .channels(1)
828       .input_offset(3)
829       .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
830   }
831 }
832 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,channels_gt_1_unipass_fulltile)833 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, channels_gt_1_unipass_fulltile) {
834   for (size_t channels = 2; channels < 10; channels++) {
835     ArgMaxPoolMicrokernelTester()
836       .pooling_elements(4)
837       .pooling_tile(4)
838       .channels(channels)
839       .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
840   }
841 }
842 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,channels_gt_1_unipass_fulltile_with_input_offset)843 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, channels_gt_1_unipass_fulltile_with_input_offset) {
844   for (size_t channels = 2; channels < 10; channels++) {
845     ArgMaxPoolMicrokernelTester()
846       .pooling_elements(4)
847       .pooling_tile(4)
848       .channels(channels)
849       .input_offset(3)
850       .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
851   }
852 }
853 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,channels_gt_1_unipass_subtile)854 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, channels_gt_1_unipass_subtile) {
855   for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
856     for (size_t channels = 2; channels < 10; channels++) {
857       ArgMaxPoolMicrokernelTester()
858         .pooling_elements(pooling_elements)
859         .pooling_tile(4)
860         .channels(channels)
861         .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
862     }
863   }
864 }
865 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,channels_gt_1_unipass_subtile_with_input_offset)866 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, channels_gt_1_unipass_subtile_with_input_offset) {
867   for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
868     for (size_t channels = 2; channels < 10; channels++) {
869       ArgMaxPoolMicrokernelTester()
870         .pooling_elements(pooling_elements)
871         .pooling_tile(4)
872         .channels(channels)
873         .input_offset(3)
874         .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
875     }
876   }
877 }
878 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,few_output_pixels)879 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, few_output_pixels) {
880   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
881     for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
882       for (size_t channels = 1; channels <= 5; channels += 1) {
883         ArgMaxPoolMicrokernelTester()
884           .output_pixels(output_pixels)
885           .pooling_elements(pooling_elements)
886           .pooling_tile(4)
887           .channels(channels)
888           .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
889       }
890     }
891   }
892 }
893 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,few_output_pixels_with_input_offset)894 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, few_output_pixels_with_input_offset) {
895   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
896     for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
897       for (size_t channels = 1; channels <= 5; channels += 1) {
898         ArgMaxPoolMicrokernelTester()
899           .output_pixels(output_pixels)
900           .pooling_elements(pooling_elements)
901           .pooling_tile(4)
902           .channels(channels)
903           .input_offset(7)
904           .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
905       }
906     }
907   }
908 }
909 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,few_output_pixels_with_output_stride)910 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, few_output_pixels_with_output_stride) {
911   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
912     for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
913       for (size_t channels = 1; channels <= 5; channels += 1) {
914         ArgMaxPoolMicrokernelTester()
915           .output_pixels(output_pixels)
916           .pooling_elements(pooling_elements)
917           .pooling_tile(4)
918           .channels(channels)
919           .output_stride(7)
920           .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
921       }
922     }
923   }
924 }
925 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,few_output_pixels_with_step)926 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, few_output_pixels_with_step) {
927   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
928     for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
929       for (size_t channels = 1; channels <= 5; channels += 1) {
930         for (size_t step = 2; step <= pooling_elements; step++) {
931           ArgMaxPoolMicrokernelTester()
932             .output_pixels(output_pixels)
933             .pooling_elements(pooling_elements)
934             .pooling_tile(4)
935             .step(step)
936             .channels(channels)
937             .output_stride(7)
938             .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
939         }
940       }
941     }
942   }
943 }
944 
945 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_ARGMAXPOOL_9X__NEON_C4,channels_eq_4_unipass_fulltile)946   TEST(F32_ARGMAXPOOL_9X__NEON_C4, channels_eq_4_unipass_fulltile) {
947     TEST_REQUIRES_ARM_NEON;
948     ArgMaxPoolMicrokernelTester()
949       .pooling_elements(9)
950       .pooling_tile(9)
951       .channels(4)
952       .Test(xnn_f32_argmaxpool_ukernel_9x__neon_c4);
953   }
954 
TEST(F32_ARGMAXPOOL_9X__NEON_C4,channels_eq_4_unipass_fulltile_with_input_offset)955   TEST(F32_ARGMAXPOOL_9X__NEON_C4, channels_eq_4_unipass_fulltile_with_input_offset) {
956     TEST_REQUIRES_ARM_NEON;
957     ArgMaxPoolMicrokernelTester()
958       .pooling_elements(9)
959       .pooling_tile(9)
960       .channels(4)
961       .input_offset(7)
962       .Test(xnn_f32_argmaxpool_ukernel_9x__neon_c4);
963   }
964 
TEST(F32_ARGMAXPOOL_9X__NEON_C4,channels_eq_4_unipass_subtile)965   TEST(F32_ARGMAXPOOL_9X__NEON_C4, channels_eq_4_unipass_subtile) {
966     TEST_REQUIRES_ARM_NEON;
967     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
968       ArgMaxPoolMicrokernelTester()
969         .pooling_elements(pooling_elements)
970         .pooling_tile(9)
971         .channels(4)
972         .Test(xnn_f32_argmaxpool_ukernel_9x__neon_c4);
973     }
974   }
975 
TEST(F32_ARGMAXPOOL_9X__NEON_C4,channels_eq_4_unipass_subtile_with_input_offset)976   TEST(F32_ARGMAXPOOL_9X__NEON_C4, channels_eq_4_unipass_subtile_with_input_offset) {
977     TEST_REQUIRES_ARM_NEON;
978     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
979       ArgMaxPoolMicrokernelTester()
980         .pooling_elements(pooling_elements)
981         .pooling_tile(9)
982         .channels(4)
983         .input_offset(7)
984         .Test(xnn_f32_argmaxpool_ukernel_9x__neon_c4);
985     }
986   }
987 
TEST(F32_ARGMAXPOOL_9X__NEON_C4,channels_div_4_unipass_fulltile)988   TEST(F32_ARGMAXPOOL_9X__NEON_C4, channels_div_4_unipass_fulltile) {
989     TEST_REQUIRES_ARM_NEON;
990     for (size_t channels = 8; channels < 32; channels += 4) {
991       ArgMaxPoolMicrokernelTester()
992         .pooling_elements(9)
993         .pooling_tile(9)
994         .channels(channels)
995         .Test(xnn_f32_argmaxpool_ukernel_9x__neon_c4);
996     }
997   }
998 
TEST(F32_ARGMAXPOOL_9X__NEON_C4,channels_div_4_unipass_fulltile_with_input_offset)999   TEST(F32_ARGMAXPOOL_9X__NEON_C4, channels_div_4_unipass_fulltile_with_input_offset) {
1000     TEST_REQUIRES_ARM_NEON;
1001     for (size_t channels = 8; channels < 32; channels += 4) {
1002       ArgMaxPoolMicrokernelTester()
1003         .pooling_elements(9)
1004         .pooling_tile(9)
1005         .channels(channels)
1006         .input_offset(37)
1007         .Test(xnn_f32_argmaxpool_ukernel_9x__neon_c4);
1008     }
1009   }
1010 
TEST(F32_ARGMAXPOOL_9X__NEON_C4,channels_div_4_unipass_subtile)1011   TEST(F32_ARGMAXPOOL_9X__NEON_C4, channels_div_4_unipass_subtile) {
1012     TEST_REQUIRES_ARM_NEON;
1013     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1014       for (size_t channels = 8; channels < 32; channels += 4) {
1015         ArgMaxPoolMicrokernelTester()
1016           .pooling_elements(pooling_elements)
1017           .pooling_tile(9)
1018           .channels(channels)
1019           .Test(xnn_f32_argmaxpool_ukernel_9x__neon_c4);
1020       }
1021     }
1022   }
1023 
TEST(F32_ARGMAXPOOL_9X__NEON_C4,channels_div_4_unipass_subtile_with_input_offset)1024   TEST(F32_ARGMAXPOOL_9X__NEON_C4, channels_div_4_unipass_subtile_with_input_offset) {
1025     TEST_REQUIRES_ARM_NEON;
1026     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1027       for (size_t channels = 8; channels < 32; channels += 4) {
1028         ArgMaxPoolMicrokernelTester()
1029           .pooling_elements(pooling_elements)
1030           .pooling_tile(9)
1031           .channels(channels)
1032           .input_offset(37)
1033           .Test(xnn_f32_argmaxpool_ukernel_9x__neon_c4);
1034       }
1035     }
1036   }
1037 
TEST(F32_ARGMAXPOOL_9X__NEON_C4,channels_lt_4_unipass_fulltile)1038   TEST(F32_ARGMAXPOOL_9X__NEON_C4, channels_lt_4_unipass_fulltile) {
1039     TEST_REQUIRES_ARM_NEON;
1040     for (size_t channels = 1; channels < 4; channels++) {
1041       ArgMaxPoolMicrokernelTester()
1042         .pooling_elements(9)
1043         .pooling_tile(9)
1044         .channels(channels)
1045         .Test(xnn_f32_argmaxpool_ukernel_9x__neon_c4);
1046     }
1047   }
1048 
TEST(F32_ARGMAXPOOL_9X__NEON_C4,channels_lt_4_unipass_fulltile_with_input_offset)1049   TEST(F32_ARGMAXPOOL_9X__NEON_C4, channels_lt_4_unipass_fulltile_with_input_offset) {
1050     TEST_REQUIRES_ARM_NEON;
1051     for (size_t channels = 1; channels < 4; channels++) {
1052       ArgMaxPoolMicrokernelTester()
1053         .pooling_elements(9)
1054         .pooling_tile(9)
1055         .channels(channels)
1056         .input_offset(5)
1057         .Test(xnn_f32_argmaxpool_ukernel_9x__neon_c4);
1058     }
1059   }
1060 
TEST(F32_ARGMAXPOOL_9X__NEON_C4,channels_lt_4_unipass_subtile)1061   TEST(F32_ARGMAXPOOL_9X__NEON_C4, channels_lt_4_unipass_subtile) {
1062     TEST_REQUIRES_ARM_NEON;
1063     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1064       for (size_t channels = 1; channels < 4; channels++) {
1065         ArgMaxPoolMicrokernelTester()
1066           .pooling_elements(pooling_elements)
1067           .pooling_tile(9)
1068           .channels(channels)
1069           .Test(xnn_f32_argmaxpool_ukernel_9x__neon_c4);
1070       }
1071     }
1072   }
1073 
TEST(F32_ARGMAXPOOL_9X__NEON_C4,channels_lt_4_unipass_subtile_with_input_offset)1074   TEST(F32_ARGMAXPOOL_9X__NEON_C4, channels_lt_4_unipass_subtile_with_input_offset) {
1075     TEST_REQUIRES_ARM_NEON;
1076     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1077       for (size_t channels = 1; channels < 4; channels++) {
1078         ArgMaxPoolMicrokernelTester()
1079           .pooling_elements(pooling_elements)
1080           .pooling_tile(9)
1081           .channels(channels)
1082           .input_offset(5)
1083           .Test(xnn_f32_argmaxpool_ukernel_9x__neon_c4);
1084       }
1085     }
1086   }
1087 
TEST(F32_ARGMAXPOOL_9X__NEON_C4,channels_gt_4_unipass_fulltile)1088   TEST(F32_ARGMAXPOOL_9X__NEON_C4, channels_gt_4_unipass_fulltile) {
1089     TEST_REQUIRES_ARM_NEON;
1090     for (size_t channels = 5; channels < 8; channels++) {
1091       ArgMaxPoolMicrokernelTester()
1092         .pooling_elements(9)
1093         .pooling_tile(9)
1094         .channels(channels)
1095         .Test(xnn_f32_argmaxpool_ukernel_9x__neon_c4);
1096     }
1097   }
1098 
TEST(F32_ARGMAXPOOL_9X__NEON_C4,channels_gt_4_unipass_fulltile_with_input_offset)1099   TEST(F32_ARGMAXPOOL_9X__NEON_C4, channels_gt_4_unipass_fulltile_with_input_offset) {
1100     TEST_REQUIRES_ARM_NEON;
1101     for (size_t channels = 5; channels < 8; channels++) {
1102       ArgMaxPoolMicrokernelTester()
1103         .pooling_elements(9)
1104         .pooling_tile(9)
1105         .channels(channels)
1106         .input_offset(11)
1107         .Test(xnn_f32_argmaxpool_ukernel_9x__neon_c4);
1108     }
1109   }
1110 
TEST(F32_ARGMAXPOOL_9X__NEON_C4,channels_gt_4_unipass_subtile)1111   TEST(F32_ARGMAXPOOL_9X__NEON_C4, channels_gt_4_unipass_subtile) {
1112     TEST_REQUIRES_ARM_NEON;
1113     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1114       for (size_t channels = 5; channels < 8; channels++) {
1115         ArgMaxPoolMicrokernelTester()
1116           .pooling_elements(pooling_elements)
1117           .pooling_tile(9)
1118           .channels(channels)
1119           .Test(xnn_f32_argmaxpool_ukernel_9x__neon_c4);
1120       }
1121     }
1122   }
1123 
TEST(F32_ARGMAXPOOL_9X__NEON_C4,channels_gt_4_unipass_subtile_with_input_offset)1124   TEST(F32_ARGMAXPOOL_9X__NEON_C4, channels_gt_4_unipass_subtile_with_input_offset) {
1125     TEST_REQUIRES_ARM_NEON;
1126     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1127       for (size_t channels = 5; channels < 8; channels++) {
1128         ArgMaxPoolMicrokernelTester()
1129           .pooling_elements(pooling_elements)
1130           .pooling_tile(9)
1131           .channels(channels)
1132           .input_offset(11)
1133           .Test(xnn_f32_argmaxpool_ukernel_9x__neon_c4);
1134       }
1135     }
1136   }
1137 
TEST(F32_ARGMAXPOOL_9X__NEON_C4,few_output_pixels)1138   TEST(F32_ARGMAXPOOL_9X__NEON_C4, few_output_pixels) {
1139     TEST_REQUIRES_ARM_NEON;
1140     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1141       for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1142         for (size_t channels = 1; channels <= 20; channels += 3) {
1143           ArgMaxPoolMicrokernelTester()
1144             .output_pixels(output_pixels)
1145             .pooling_elements(pooling_elements)
1146             .pooling_tile(9)
1147             .channels(channels)
1148             .Test(xnn_f32_argmaxpool_ukernel_9x__neon_c4);
1149         }
1150       }
1151     }
1152   }
1153 
TEST(F32_ARGMAXPOOL_9X__NEON_C4,few_output_pixels_with_input_offset)1154   TEST(F32_ARGMAXPOOL_9X__NEON_C4, few_output_pixels_with_input_offset) {
1155     TEST_REQUIRES_ARM_NEON;
1156     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1157       for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1158         for (size_t channels = 1; channels <= 20; channels += 3) {
1159           ArgMaxPoolMicrokernelTester()
1160             .output_pixels(output_pixels)
1161             .pooling_elements(pooling_elements)
1162             .pooling_tile(9)
1163             .channels(channels)
1164             .input_offset(23)
1165             .Test(xnn_f32_argmaxpool_ukernel_9x__neon_c4);
1166         }
1167       }
1168     }
1169   }
1170 
TEST(F32_ARGMAXPOOL_9X__NEON_C4,few_output_pixels_with_output_stride)1171   TEST(F32_ARGMAXPOOL_9X__NEON_C4, few_output_pixels_with_output_stride) {
1172     TEST_REQUIRES_ARM_NEON;
1173     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1174       for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1175         for (size_t channels = 1; channels <= 20; channels += 3) {
1176           ArgMaxPoolMicrokernelTester()
1177             .output_pixels(output_pixels)
1178             .pooling_elements(pooling_elements)
1179             .pooling_tile(9)
1180             .channels(channels)
1181             .output_stride(23)
1182             .Test(xnn_f32_argmaxpool_ukernel_9x__neon_c4);
1183         }
1184       }
1185     }
1186   }
1187 
TEST(F32_ARGMAXPOOL_9X__NEON_C4,few_output_pixels_with_step)1188   TEST(F32_ARGMAXPOOL_9X__NEON_C4, few_output_pixels_with_step) {
1189     TEST_REQUIRES_ARM_NEON;
1190     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1191       for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1192         for (size_t channels = 1; channels <= 20; channels += 3) {
1193           for (size_t step = 2; step <= pooling_elements; step++) {
1194             ArgMaxPoolMicrokernelTester()
1195               .output_pixels(output_pixels)
1196               .pooling_elements(pooling_elements)
1197               .pooling_tile(9)
1198               .step(step)
1199               .channels(channels)
1200               .output_stride(23)
1201               .Test(xnn_f32_argmaxpool_ukernel_9x__neon_c4);
1202           }
1203         }
1204       }
1205     }
1206   }
1207 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1208 
1209 
1210 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_eq_4_unipass_fulltile)1211   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_eq_4_unipass_fulltile) {
1212     TEST_REQUIRES_X86_SSE2;
1213     ArgMaxPoolMicrokernelTester()
1214       .pooling_elements(9)
1215       .pooling_tile(9)
1216       .channels(4)
1217       .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1218   }
1219 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_eq_4_unipass_fulltile_with_input_offset)1220   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_eq_4_unipass_fulltile_with_input_offset) {
1221     TEST_REQUIRES_X86_SSE2;
1222     ArgMaxPoolMicrokernelTester()
1223       .pooling_elements(9)
1224       .pooling_tile(9)
1225       .channels(4)
1226       .input_offset(7)
1227       .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1228   }
1229 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_eq_4_unipass_subtile)1230   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_eq_4_unipass_subtile) {
1231     TEST_REQUIRES_X86_SSE2;
1232     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1233       ArgMaxPoolMicrokernelTester()
1234         .pooling_elements(pooling_elements)
1235         .pooling_tile(9)
1236         .channels(4)
1237         .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1238     }
1239   }
1240 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_eq_4_unipass_subtile_with_input_offset)1241   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_eq_4_unipass_subtile_with_input_offset) {
1242     TEST_REQUIRES_X86_SSE2;
1243     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1244       ArgMaxPoolMicrokernelTester()
1245         .pooling_elements(pooling_elements)
1246         .pooling_tile(9)
1247         .channels(4)
1248         .input_offset(7)
1249         .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1250     }
1251   }
1252 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_div_4_unipass_fulltile)1253   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_div_4_unipass_fulltile) {
1254     TEST_REQUIRES_X86_SSE2;
1255     for (size_t channels = 8; channels < 32; channels += 4) {
1256       ArgMaxPoolMicrokernelTester()
1257         .pooling_elements(9)
1258         .pooling_tile(9)
1259         .channels(channels)
1260         .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1261     }
1262   }
1263 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_div_4_unipass_fulltile_with_input_offset)1264   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_div_4_unipass_fulltile_with_input_offset) {
1265     TEST_REQUIRES_X86_SSE2;
1266     for (size_t channels = 8; channels < 32; channels += 4) {
1267       ArgMaxPoolMicrokernelTester()
1268         .pooling_elements(9)
1269         .pooling_tile(9)
1270         .channels(channels)
1271         .input_offset(37)
1272         .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1273     }
1274   }
1275 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_div_4_unipass_subtile)1276   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_div_4_unipass_subtile) {
1277     TEST_REQUIRES_X86_SSE2;
1278     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1279       for (size_t channels = 8; channels < 32; channels += 4) {
1280         ArgMaxPoolMicrokernelTester()
1281           .pooling_elements(pooling_elements)
1282           .pooling_tile(9)
1283           .channels(channels)
1284           .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1285       }
1286     }
1287   }
1288 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_div_4_unipass_subtile_with_input_offset)1289   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_div_4_unipass_subtile_with_input_offset) {
1290     TEST_REQUIRES_X86_SSE2;
1291     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1292       for (size_t channels = 8; channels < 32; channels += 4) {
1293         ArgMaxPoolMicrokernelTester()
1294           .pooling_elements(pooling_elements)
1295           .pooling_tile(9)
1296           .channels(channels)
1297           .input_offset(37)
1298           .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1299       }
1300     }
1301   }
1302 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_lt_4_unipass_fulltile)1303   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_lt_4_unipass_fulltile) {
1304     TEST_REQUIRES_X86_SSE2;
1305     for (size_t channels = 1; channels < 4; channels++) {
1306       ArgMaxPoolMicrokernelTester()
1307         .pooling_elements(9)
1308         .pooling_tile(9)
1309         .channels(channels)
1310         .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1311     }
1312   }
1313 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_lt_4_unipass_fulltile_with_input_offset)1314   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_lt_4_unipass_fulltile_with_input_offset) {
1315     TEST_REQUIRES_X86_SSE2;
1316     for (size_t channels = 1; channels < 4; channels++) {
1317       ArgMaxPoolMicrokernelTester()
1318         .pooling_elements(9)
1319         .pooling_tile(9)
1320         .channels(channels)
1321         .input_offset(5)
1322         .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1323     }
1324   }
1325 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_lt_4_unipass_subtile)1326   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_lt_4_unipass_subtile) {
1327     TEST_REQUIRES_X86_SSE2;
1328     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1329       for (size_t channels = 1; channels < 4; channels++) {
1330         ArgMaxPoolMicrokernelTester()
1331           .pooling_elements(pooling_elements)
1332           .pooling_tile(9)
1333           .channels(channels)
1334           .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1335       }
1336     }
1337   }
1338 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_lt_4_unipass_subtile_with_input_offset)1339   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_lt_4_unipass_subtile_with_input_offset) {
1340     TEST_REQUIRES_X86_SSE2;
1341     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1342       for (size_t channels = 1; channels < 4; channels++) {
1343         ArgMaxPoolMicrokernelTester()
1344           .pooling_elements(pooling_elements)
1345           .pooling_tile(9)
1346           .channels(channels)
1347           .input_offset(5)
1348           .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1349       }
1350     }
1351   }
1352 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_gt_4_unipass_fulltile)1353   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_gt_4_unipass_fulltile) {
1354     TEST_REQUIRES_X86_SSE2;
1355     for (size_t channels = 5; channels < 8; channels++) {
1356       ArgMaxPoolMicrokernelTester()
1357         .pooling_elements(9)
1358         .pooling_tile(9)
1359         .channels(channels)
1360         .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1361     }
1362   }
1363 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_gt_4_unipass_fulltile_with_input_offset)1364   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_gt_4_unipass_fulltile_with_input_offset) {
1365     TEST_REQUIRES_X86_SSE2;
1366     for (size_t channels = 5; channels < 8; channels++) {
1367       ArgMaxPoolMicrokernelTester()
1368         .pooling_elements(9)
1369         .pooling_tile(9)
1370         .channels(channels)
1371         .input_offset(11)
1372         .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1373     }
1374   }
1375 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_gt_4_unipass_subtile)1376   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_gt_4_unipass_subtile) {
1377     TEST_REQUIRES_X86_SSE2;
1378     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1379       for (size_t channels = 5; channels < 8; channels++) {
1380         ArgMaxPoolMicrokernelTester()
1381           .pooling_elements(pooling_elements)
1382           .pooling_tile(9)
1383           .channels(channels)
1384           .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1385       }
1386     }
1387   }
1388 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_gt_4_unipass_subtile_with_input_offset)1389   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_gt_4_unipass_subtile_with_input_offset) {
1390     TEST_REQUIRES_X86_SSE2;
1391     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1392       for (size_t channels = 5; channels < 8; channels++) {
1393         ArgMaxPoolMicrokernelTester()
1394           .pooling_elements(pooling_elements)
1395           .pooling_tile(9)
1396           .channels(channels)
1397           .input_offset(11)
1398           .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1399       }
1400     }
1401   }
1402 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,few_output_pixels)1403   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, few_output_pixels) {
1404     TEST_REQUIRES_X86_SSE2;
1405     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1406       for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1407         for (size_t channels = 1; channels <= 20; channels += 3) {
1408           ArgMaxPoolMicrokernelTester()
1409             .output_pixels(output_pixels)
1410             .pooling_elements(pooling_elements)
1411             .pooling_tile(9)
1412             .channels(channels)
1413             .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1414         }
1415       }
1416     }
1417   }
1418 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,few_output_pixels_with_input_offset)1419   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, few_output_pixels_with_input_offset) {
1420     TEST_REQUIRES_X86_SSE2;
1421     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1422       for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1423         for (size_t channels = 1; channels <= 20; channels += 3) {
1424           ArgMaxPoolMicrokernelTester()
1425             .output_pixels(output_pixels)
1426             .pooling_elements(pooling_elements)
1427             .pooling_tile(9)
1428             .channels(channels)
1429             .input_offset(23)
1430             .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1431         }
1432       }
1433     }
1434   }
1435 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,few_output_pixels_with_output_stride)1436   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, few_output_pixels_with_output_stride) {
1437     TEST_REQUIRES_X86_SSE2;
1438     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1439       for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1440         for (size_t channels = 1; channels <= 20; channels += 3) {
1441           ArgMaxPoolMicrokernelTester()
1442             .output_pixels(output_pixels)
1443             .pooling_elements(pooling_elements)
1444             .pooling_tile(9)
1445             .channels(channels)
1446             .output_stride(23)
1447             .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1448         }
1449       }
1450     }
1451   }
1452 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,few_output_pixels_with_step)1453   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, few_output_pixels_with_step) {
1454     TEST_REQUIRES_X86_SSE2;
1455     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1456       for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1457         for (size_t channels = 1; channels <= 20; channels += 3) {
1458           for (size_t step = 2; step <= pooling_elements; step++) {
1459             ArgMaxPoolMicrokernelTester()
1460               .output_pixels(output_pixels)
1461               .pooling_elements(pooling_elements)
1462               .pooling_tile(9)
1463               .step(step)
1464               .channels(channels)
1465               .output_stride(23)
1466               .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1467           }
1468         }
1469       }
1470     }
1471   }
1472 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1473 
1474 
1475 #if XNN_ARCH_WASMSIMD
TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4,channels_eq_4_unipass_fulltile)1476   TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4, channels_eq_4_unipass_fulltile) {
1477     ArgMaxPoolMicrokernelTester()
1478       .pooling_elements(9)
1479       .pooling_tile(9)
1480       .channels(4)
1481       .Test(xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4);
1482   }
1483 
TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4,channels_eq_4_unipass_fulltile_with_input_offset)1484   TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4, channels_eq_4_unipass_fulltile_with_input_offset) {
1485     ArgMaxPoolMicrokernelTester()
1486       .pooling_elements(9)
1487       .pooling_tile(9)
1488       .channels(4)
1489       .input_offset(7)
1490       .Test(xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4);
1491   }
1492 
TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4,channels_eq_4_unipass_subtile)1493   TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4, channels_eq_4_unipass_subtile) {
1494     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1495       ArgMaxPoolMicrokernelTester()
1496         .pooling_elements(pooling_elements)
1497         .pooling_tile(9)
1498         .channels(4)
1499         .Test(xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4);
1500     }
1501   }
1502 
TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4,channels_eq_4_unipass_subtile_with_input_offset)1503   TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4, channels_eq_4_unipass_subtile_with_input_offset) {
1504     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1505       ArgMaxPoolMicrokernelTester()
1506         .pooling_elements(pooling_elements)
1507         .pooling_tile(9)
1508         .channels(4)
1509         .input_offset(7)
1510         .Test(xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4);
1511     }
1512   }
1513 
TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4,channels_div_4_unipass_fulltile)1514   TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4, channels_div_4_unipass_fulltile) {
1515     for (size_t channels = 8; channels < 32; channels += 4) {
1516       ArgMaxPoolMicrokernelTester()
1517         .pooling_elements(9)
1518         .pooling_tile(9)
1519         .channels(channels)
1520         .Test(xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4);
1521     }
1522   }
1523 
TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4,channels_div_4_unipass_fulltile_with_input_offset)1524   TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4, channels_div_4_unipass_fulltile_with_input_offset) {
1525     for (size_t channels = 8; channels < 32; channels += 4) {
1526       ArgMaxPoolMicrokernelTester()
1527         .pooling_elements(9)
1528         .pooling_tile(9)
1529         .channels(channels)
1530         .input_offset(37)
1531         .Test(xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4);
1532     }
1533   }
1534 
TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4,channels_div_4_unipass_subtile)1535   TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4, channels_div_4_unipass_subtile) {
1536     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1537       for (size_t channels = 8; channels < 32; channels += 4) {
1538         ArgMaxPoolMicrokernelTester()
1539           .pooling_elements(pooling_elements)
1540           .pooling_tile(9)
1541           .channels(channels)
1542           .Test(xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4);
1543       }
1544     }
1545   }
1546 
TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4,channels_div_4_unipass_subtile_with_input_offset)1547   TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4, channels_div_4_unipass_subtile_with_input_offset) {
1548     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1549       for (size_t channels = 8; channels < 32; channels += 4) {
1550         ArgMaxPoolMicrokernelTester()
1551           .pooling_elements(pooling_elements)
1552           .pooling_tile(9)
1553           .channels(channels)
1554           .input_offset(37)
1555           .Test(xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4);
1556       }
1557     }
1558   }
1559 
TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4,channels_lt_4_unipass_fulltile)1560   TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4, channels_lt_4_unipass_fulltile) {
1561     for (size_t channels = 1; channels < 4; channels++) {
1562       ArgMaxPoolMicrokernelTester()
1563         .pooling_elements(9)
1564         .pooling_tile(9)
1565         .channels(channels)
1566         .Test(xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4);
1567     }
1568   }
1569 
TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4,channels_lt_4_unipass_fulltile_with_input_offset)1570   TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4, channels_lt_4_unipass_fulltile_with_input_offset) {
1571     for (size_t channels = 1; channels < 4; channels++) {
1572       ArgMaxPoolMicrokernelTester()
1573         .pooling_elements(9)
1574         .pooling_tile(9)
1575         .channels(channels)
1576         .input_offset(5)
1577         .Test(xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4);
1578     }
1579   }
1580 
TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4,channels_lt_4_unipass_subtile)1581   TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4, channels_lt_4_unipass_subtile) {
1582     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1583       for (size_t channels = 1; channels < 4; channels++) {
1584         ArgMaxPoolMicrokernelTester()
1585           .pooling_elements(pooling_elements)
1586           .pooling_tile(9)
1587           .channels(channels)
1588           .Test(xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4);
1589       }
1590     }
1591   }
1592 
TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4,channels_lt_4_unipass_subtile_with_input_offset)1593   TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4, channels_lt_4_unipass_subtile_with_input_offset) {
1594     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1595       for (size_t channels = 1; channels < 4; channels++) {
1596         ArgMaxPoolMicrokernelTester()
1597           .pooling_elements(pooling_elements)
1598           .pooling_tile(9)
1599           .channels(channels)
1600           .input_offset(5)
1601           .Test(xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4);
1602       }
1603     }
1604   }
1605 
TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4,channels_gt_4_unipass_fulltile)1606   TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4, channels_gt_4_unipass_fulltile) {
1607     for (size_t channels = 5; channels < 8; channels++) {
1608       ArgMaxPoolMicrokernelTester()
1609         .pooling_elements(9)
1610         .pooling_tile(9)
1611         .channels(channels)
1612         .Test(xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4);
1613     }
1614   }
1615 
TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4,channels_gt_4_unipass_fulltile_with_input_offset)1616   TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4, channels_gt_4_unipass_fulltile_with_input_offset) {
1617     for (size_t channels = 5; channels < 8; channels++) {
1618       ArgMaxPoolMicrokernelTester()
1619         .pooling_elements(9)
1620         .pooling_tile(9)
1621         .channels(channels)
1622         .input_offset(11)
1623         .Test(xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4);
1624     }
1625   }
1626 
TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4,channels_gt_4_unipass_subtile)1627   TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4, channels_gt_4_unipass_subtile) {
1628     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1629       for (size_t channels = 5; channels < 8; channels++) {
1630         ArgMaxPoolMicrokernelTester()
1631           .pooling_elements(pooling_elements)
1632           .pooling_tile(9)
1633           .channels(channels)
1634           .Test(xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4);
1635       }
1636     }
1637   }
1638 
TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4,channels_gt_4_unipass_subtile_with_input_offset)1639   TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4, channels_gt_4_unipass_subtile_with_input_offset) {
1640     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1641       for (size_t channels = 5; channels < 8; channels++) {
1642         ArgMaxPoolMicrokernelTester()
1643           .pooling_elements(pooling_elements)
1644           .pooling_tile(9)
1645           .channels(channels)
1646           .input_offset(11)
1647           .Test(xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4);
1648       }
1649     }
1650   }
1651 
TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4,few_output_pixels)1652   TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4, few_output_pixels) {
1653     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1654       for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1655         for (size_t channels = 1; channels <= 20; channels += 3) {
1656           ArgMaxPoolMicrokernelTester()
1657             .output_pixels(output_pixels)
1658             .pooling_elements(pooling_elements)
1659             .pooling_tile(9)
1660             .channels(channels)
1661             .Test(xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4);
1662         }
1663       }
1664     }
1665   }
1666 
TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4,few_output_pixels_with_input_offset)1667   TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4, few_output_pixels_with_input_offset) {
1668     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1669       for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1670         for (size_t channels = 1; channels <= 20; channels += 3) {
1671           ArgMaxPoolMicrokernelTester()
1672             .output_pixels(output_pixels)
1673             .pooling_elements(pooling_elements)
1674             .pooling_tile(9)
1675             .channels(channels)
1676             .input_offset(23)
1677             .Test(xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4);
1678         }
1679       }
1680     }
1681   }
1682 
TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4,few_output_pixels_with_output_stride)1683   TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4, few_output_pixels_with_output_stride) {
1684     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1685       for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1686         for (size_t channels = 1; channels <= 20; channels += 3) {
1687           ArgMaxPoolMicrokernelTester()
1688             .output_pixels(output_pixels)
1689             .pooling_elements(pooling_elements)
1690             .pooling_tile(9)
1691             .channels(channels)
1692             .output_stride(23)
1693             .Test(xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4);
1694         }
1695       }
1696     }
1697   }
1698 
TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4,few_output_pixels_with_step)1699   TEST(F32_ARGMAXPOOL_9X__WASMSIMD_C4, few_output_pixels_with_step) {
1700     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1701       for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1702         for (size_t channels = 1; channels <= 20; channels += 3) {
1703           for (size_t step = 2; step <= pooling_elements; step++) {
1704             ArgMaxPoolMicrokernelTester()
1705               .output_pixels(output_pixels)
1706               .pooling_elements(pooling_elements)
1707               .pooling_tile(9)
1708               .step(step)
1709               .channels(channels)
1710               .output_stride(23)
1711               .Test(xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4);
1712           }
1713         }
1714       }
1715     }
1716   }
1717 #endif  // XNN_ARCH_WASMSIMD
1718 
1719 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,channels_eq_1_unipass_fulltile)1720 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, channels_eq_1_unipass_fulltile) {
1721   ArgMaxPoolMicrokernelTester()
1722     .pooling_elements(9)
1723     .pooling_tile(9)
1724     .channels(1)
1725     .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1726 }
1727 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,channels_eq_1_unipass_fulltile_with_input_offset)1728 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, channels_eq_1_unipass_fulltile_with_input_offset) {
1729   ArgMaxPoolMicrokernelTester()
1730     .pooling_elements(9)
1731     .pooling_tile(9)
1732     .channels(1)
1733     .input_offset(3)
1734     .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1735 }
1736 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,channels_eq_1_unipass_subtile)1737 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, channels_eq_1_unipass_subtile) {
1738   for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1739     ArgMaxPoolMicrokernelTester()
1740       .pooling_elements(pooling_elements)
1741       .pooling_tile(9)
1742       .channels(1)
1743       .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1744   }
1745 }
1746 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,channels_eq_1_unipass_subtile_with_input_offset)1747 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, channels_eq_1_unipass_subtile_with_input_offset) {
1748   for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1749     ArgMaxPoolMicrokernelTester()
1750       .pooling_elements(pooling_elements)
1751       .pooling_tile(9)
1752       .channels(1)
1753       .input_offset(3)
1754       .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1755   }
1756 }
1757 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,channels_gt_1_unipass_fulltile)1758 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, channels_gt_1_unipass_fulltile) {
1759   for (size_t channels = 2; channels < 10; channels++) {
1760     ArgMaxPoolMicrokernelTester()
1761       .pooling_elements(9)
1762       .pooling_tile(9)
1763       .channels(channels)
1764       .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1765   }
1766 }
1767 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,channels_gt_1_unipass_fulltile_with_input_offset)1768 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, channels_gt_1_unipass_fulltile_with_input_offset) {
1769   for (size_t channels = 2; channels < 10; channels++) {
1770     ArgMaxPoolMicrokernelTester()
1771       .pooling_elements(9)
1772       .pooling_tile(9)
1773       .channels(channels)
1774       .input_offset(3)
1775       .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1776   }
1777 }
1778 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,channels_gt_1_unipass_subtile)1779 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, channels_gt_1_unipass_subtile) {
1780   for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1781     for (size_t channels = 2; channels < 10; channels++) {
1782       ArgMaxPoolMicrokernelTester()
1783         .pooling_elements(pooling_elements)
1784         .pooling_tile(9)
1785         .channels(channels)
1786         .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1787     }
1788   }
1789 }
1790 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,channels_gt_1_unipass_subtile_with_input_offset)1791 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, channels_gt_1_unipass_subtile_with_input_offset) {
1792   for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1793     for (size_t channels = 2; channels < 10; channels++) {
1794       ArgMaxPoolMicrokernelTester()
1795         .pooling_elements(pooling_elements)
1796         .pooling_tile(9)
1797         .channels(channels)
1798         .input_offset(3)
1799         .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1800     }
1801   }
1802 }
1803 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,few_output_pixels)1804 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, few_output_pixels) {
1805   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1806     for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1807       for (size_t channels = 1; channels <= 5; channels += 1) {
1808         ArgMaxPoolMicrokernelTester()
1809           .output_pixels(output_pixels)
1810           .pooling_elements(pooling_elements)
1811           .pooling_tile(9)
1812           .channels(channels)
1813           .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1814       }
1815     }
1816   }
1817 }
1818 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,few_output_pixels_with_input_offset)1819 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, few_output_pixels_with_input_offset) {
1820   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1821     for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1822       for (size_t channels = 1; channels <= 5; channels += 1) {
1823         ArgMaxPoolMicrokernelTester()
1824           .output_pixels(output_pixels)
1825           .pooling_elements(pooling_elements)
1826           .pooling_tile(9)
1827           .channels(channels)
1828           .input_offset(7)
1829           .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1830       }
1831     }
1832   }
1833 }
1834 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,few_output_pixels_with_output_stride)1835 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, few_output_pixels_with_output_stride) {
1836   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1837     for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1838       for (size_t channels = 1; channels <= 5; channels += 1) {
1839         ArgMaxPoolMicrokernelTester()
1840           .output_pixels(output_pixels)
1841           .pooling_elements(pooling_elements)
1842           .pooling_tile(9)
1843           .channels(channels)
1844           .output_stride(7)
1845           .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1846       }
1847     }
1848   }
1849 }
1850 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,few_output_pixels_with_step)1851 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, few_output_pixels_with_step) {
1852   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1853     for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1854       for (size_t channels = 1; channels <= 5; channels += 1) {
1855         for (size_t step = 2; step <= pooling_elements; step++) {
1856           ArgMaxPoolMicrokernelTester()
1857             .output_pixels(output_pixels)
1858             .pooling_elements(pooling_elements)
1859             .pooling_tile(9)
1860             .step(step)
1861             .channels(channels)
1862             .output_stride(7)
1863             .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1864         }
1865       }
1866     }
1867   }
1868 }
1869 
1870 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
1871 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,channels_eq_4_twopass_fulltile)1872   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, channels_eq_4_twopass_fulltile) {
1873     TEST_REQUIRES_ARM_NEON;
1874     ArgMaxPoolMicrokernelTester()
1875       .pooling_elements(17)
1876       .pooling_tile(9, 8)
1877       .channels(4)
1878       .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
1879   }
1880 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,channels_eq_4_twopass_fulltile_with_input_offset)1881   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, channels_eq_4_twopass_fulltile_with_input_offset) {
1882     TEST_REQUIRES_ARM_NEON;
1883     ArgMaxPoolMicrokernelTester()
1884       .pooling_elements(17)
1885       .pooling_tile(9, 8)
1886       .channels(4)
1887       .input_offset(7)
1888       .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
1889   }
1890 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,channels_eq_4_twopass_subtile)1891   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, channels_eq_4_twopass_subtile) {
1892     TEST_REQUIRES_ARM_NEON;
1893     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
1894       ArgMaxPoolMicrokernelTester()
1895         .pooling_elements(pooling_elements)
1896         .pooling_tile(9, 8)
1897         .channels(4)
1898         .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
1899     }
1900   }
1901 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,channels_eq_4_twopass_subtile_with_input_offset)1902   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, channels_eq_4_twopass_subtile_with_input_offset) {
1903     TEST_REQUIRES_ARM_NEON;
1904     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
1905       ArgMaxPoolMicrokernelTester()
1906         .pooling_elements(pooling_elements)
1907         .pooling_tile(9, 8)
1908         .channels(4)
1909         .input_offset(7)
1910         .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
1911     }
1912   }
1913 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,channels_div_4_twopass_fulltile)1914   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, channels_div_4_twopass_fulltile) {
1915     TEST_REQUIRES_ARM_NEON;
1916     for (size_t channels = 8; channels < 32; channels += 4) {
1917       ArgMaxPoolMicrokernelTester()
1918         .pooling_elements(17)
1919         .pooling_tile(9, 8)
1920         .channels(channels)
1921         .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
1922     }
1923   }
1924 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,channels_div_4_twopass_fulltile_with_input_offset)1925   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, channels_div_4_twopass_fulltile_with_input_offset) {
1926     TEST_REQUIRES_ARM_NEON;
1927     for (size_t channels = 8; channels < 32; channels += 4) {
1928       ArgMaxPoolMicrokernelTester()
1929         .pooling_elements(17)
1930         .pooling_tile(9, 8)
1931         .channels(channels)
1932         .input_offset(23)
1933         .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
1934     }
1935   }
1936 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,channels_div_4_twopass_subtile)1937   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, channels_div_4_twopass_subtile) {
1938     TEST_REQUIRES_ARM_NEON;
1939     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
1940       for (size_t channels = 8; channels < 32; channels += 4) {
1941         ArgMaxPoolMicrokernelTester()
1942           .pooling_elements(17)
1943           .pooling_tile(9, 8)
1944           .channels(channels)
1945           .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
1946       }
1947     }
1948   }
1949 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,channels_div_4_twopass_subtile_with_input_offset)1950   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, channels_div_4_twopass_subtile_with_input_offset) {
1951     TEST_REQUIRES_ARM_NEON;
1952     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
1953       for (size_t channels = 8; channels < 32; channels += 4) {
1954         ArgMaxPoolMicrokernelTester()
1955           .pooling_elements(17)
1956           .pooling_tile(9, 8)
1957           .channels(channels)
1958           .input_offset(37)
1959           .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
1960       }
1961     }
1962   }
1963 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,channels_lt_4_twopass_fulltile)1964   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, channels_lt_4_twopass_fulltile) {
1965     TEST_REQUIRES_ARM_NEON;
1966     for (size_t channels = 1; channels < 4; channels++) {
1967       ArgMaxPoolMicrokernelTester()
1968         .pooling_elements(17)
1969         .pooling_tile(9, 8)
1970         .channels(channels)
1971         .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
1972     }
1973   }
1974 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,channels_lt_4_twopass_fulltile_with_input_offset)1975   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, channels_lt_4_twopass_fulltile_with_input_offset) {
1976     TEST_REQUIRES_ARM_NEON;
1977     for (size_t channels = 1; channels < 4; channels++) {
1978       ArgMaxPoolMicrokernelTester()
1979         .pooling_elements(17)
1980         .pooling_tile(9, 8)
1981         .channels(channels)
1982         .input_offset(5)
1983         .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
1984     }
1985   }
1986 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,channels_lt_4_twopass_subtile)1987   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, channels_lt_4_twopass_subtile) {
1988     TEST_REQUIRES_ARM_NEON;
1989     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
1990       for (size_t channels = 1; channels < 4; channels++) {
1991         ArgMaxPoolMicrokernelTester()
1992           .pooling_elements(17)
1993           .pooling_tile(9, 8)
1994           .channels(channels)
1995           .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
1996       }
1997     }
1998   }
1999 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,channels_lt_4_twopass_subtile_with_input_offset)2000   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, channels_lt_4_twopass_subtile_with_input_offset) {
2001     TEST_REQUIRES_ARM_NEON;
2002     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2003       for (size_t channels = 1; channels < 4; channels++) {
2004         ArgMaxPoolMicrokernelTester()
2005           .pooling_elements(17)
2006           .pooling_tile(9, 8)
2007           .channels(channels)
2008           .input_offset(5)
2009           .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
2010       }
2011     }
2012   }
2013 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,channels_gt_4_twopass_fulltile)2014   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, channels_gt_4_twopass_fulltile) {
2015     TEST_REQUIRES_ARM_NEON;
2016     for (size_t channels = 5; channels < 8; channels++) {
2017       ArgMaxPoolMicrokernelTester()
2018         .pooling_elements(17)
2019         .pooling_tile(9, 8)
2020         .channels(channels)
2021         .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
2022     }
2023   }
2024 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,channels_gt_4_twopass_fulltile_with_input_offset)2025   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, channels_gt_4_twopass_fulltile_with_input_offset) {
2026     TEST_REQUIRES_ARM_NEON;
2027     for (size_t channels = 5; channels < 8; channels++) {
2028       ArgMaxPoolMicrokernelTester()
2029         .pooling_elements(17)
2030         .pooling_tile(9, 8)
2031         .channels(channels)
2032         .input_offset(11)
2033         .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
2034     }
2035   }
2036 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,channels_gt_4_twopass_subtile)2037   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, channels_gt_4_twopass_subtile) {
2038     TEST_REQUIRES_ARM_NEON;
2039     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2040       for (size_t channels = 5; channels < 8; channels++) {
2041         ArgMaxPoolMicrokernelTester()
2042           .pooling_elements(17)
2043           .pooling_tile(9, 8)
2044           .channels(channels)
2045           .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
2046       }
2047     }
2048   }
2049 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,channels_gt_4_twopass_subtile_with_input_offset)2050   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, channels_gt_4_twopass_subtile_with_input_offset) {
2051     TEST_REQUIRES_ARM_NEON;
2052     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2053       for (size_t channels = 5; channels < 8; channels++) {
2054         ArgMaxPoolMicrokernelTester()
2055           .pooling_elements(17)
2056           .pooling_tile(9, 8)
2057           .channels(channels)
2058           .input_offset(11)
2059           .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
2060       }
2061     }
2062   }
2063 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,channels_eq_4_multipass)2064   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, channels_eq_4_multipass) {
2065     TEST_REQUIRES_ARM_NEON;
2066     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2067       ArgMaxPoolMicrokernelTester()
2068         .pooling_elements(17)
2069         .pooling_tile(9, 8)
2070         .channels(4)
2071         .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
2072     }
2073   }
2074 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,channels_eq_4_multipass_with_input_offset)2075   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, channels_eq_4_multipass_with_input_offset) {
2076     TEST_REQUIRES_ARM_NEON;
2077     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2078       ArgMaxPoolMicrokernelTester()
2079         .pooling_elements(17)
2080         .pooling_tile(9, 8)
2081         .channels(4)
2082         .input_offset(7)
2083         .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
2084     }
2085   }
2086 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,channels_div_4_multipass)2087   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, channels_div_4_multipass) {
2088     TEST_REQUIRES_ARM_NEON;
2089     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2090       for (size_t channels = 8; channels < 32; channels += 4) {
2091         ArgMaxPoolMicrokernelTester()
2092           .pooling_elements(17)
2093           .pooling_tile(9, 8)
2094           .channels(channels)
2095           .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
2096       }
2097     }
2098   }
2099 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,channels_div_4_multipass_with_input_offset)2100   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, channels_div_4_multipass_with_input_offset) {
2101     TEST_REQUIRES_ARM_NEON;
2102     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2103       for (size_t channels = 8; channels < 32; channels += 4) {
2104         ArgMaxPoolMicrokernelTester()
2105           .pooling_elements(17)
2106           .pooling_tile(9, 8)
2107           .channels(channels)
2108           .input_offset(37)
2109           .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
2110       }
2111     }
2112   }
2113 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,channels_lt_4_multipass)2114   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, channels_lt_4_multipass) {
2115     TEST_REQUIRES_ARM_NEON;
2116     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2117       for (size_t channels = 1; channels < 4; channels++) {
2118         ArgMaxPoolMicrokernelTester()
2119           .pooling_elements(17)
2120           .pooling_tile(9, 8)
2121           .channels(channels)
2122           .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
2123       }
2124     }
2125   }
2126 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,channels_lt_4_multipass_with_input_offset)2127   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, channels_lt_4_multipass_with_input_offset) {
2128     TEST_REQUIRES_ARM_NEON;
2129     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2130       for (size_t channels = 1; channels < 4; channels++) {
2131         ArgMaxPoolMicrokernelTester()
2132           .pooling_elements(17)
2133           .pooling_tile(9, 8)
2134           .channels(channels)
2135           .input_offset(4)
2136           .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
2137       }
2138     }
2139   }
2140 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,channels_gt_4_multipass)2141   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, channels_gt_4_multipass) {
2142     TEST_REQUIRES_ARM_NEON;
2143     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2144       for (size_t channels = 5; channels < 8; channels++) {
2145         ArgMaxPoolMicrokernelTester()
2146           .pooling_elements(17)
2147           .pooling_tile(9, 8)
2148           .channels(channels)
2149           .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
2150       }
2151     }
2152   }
2153 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,channels_gt_4_multipass_with_input_offset)2154   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, channels_gt_4_multipass_with_input_offset) {
2155     TEST_REQUIRES_ARM_NEON;
2156     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2157       for (size_t channels = 5; channels < 8; channels++) {
2158         ArgMaxPoolMicrokernelTester()
2159           .pooling_elements(17)
2160           .pooling_tile(9, 8)
2161           .channels(channels)
2162           .input_offset(11)
2163           .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
2164       }
2165     }
2166   }
2167 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,few_output_pixels)2168   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, few_output_pixels) {
2169     TEST_REQUIRES_ARM_NEON;
2170     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2171       for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
2172         for (size_t channels = 1; channels <= 20; channels += 3) {
2173           ArgMaxPoolMicrokernelTester()
2174             .output_pixels(output_pixels)
2175             .pooling_elements(pooling_elements)
2176             .pooling_tile(9, 8)
2177             .channels(channels)
2178             .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
2179         }
2180       }
2181     }
2182   }
2183 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,few_output_pixels_with_input_offset)2184   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, few_output_pixels_with_input_offset) {
2185     TEST_REQUIRES_ARM_NEON;
2186     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2187       for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
2188         for (size_t channels = 1; channels <= 20; channels += 3) {
2189           ArgMaxPoolMicrokernelTester()
2190             .output_pixels(output_pixels)
2191             .pooling_elements(pooling_elements)
2192             .pooling_tile(9, 8)
2193             .channels(channels)
2194             .input_offset(23)
2195             .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
2196         }
2197       }
2198     }
2199   }
2200 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,few_output_pixels_with_output_stride)2201   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, few_output_pixels_with_output_stride) {
2202     TEST_REQUIRES_ARM_NEON;
2203     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2204       for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
2205         for (size_t channels = 1; channels <= 20; channels += 3) {
2206           ArgMaxPoolMicrokernelTester()
2207             .output_pixels(output_pixels)
2208             .pooling_elements(pooling_elements)
2209             .pooling_tile(9, 8)
2210             .channels(channels)
2211             .output_stride(23)
2212             .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
2213         }
2214       }
2215     }
2216   }
2217 
TEST(F32_ARGMAXPOOL_9P8X__NEON_C4,few_output_pixels_with_step)2218   TEST(F32_ARGMAXPOOL_9P8X__NEON_C4, few_output_pixels_with_step) {
2219     TEST_REQUIRES_ARM_NEON;
2220     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2221       for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
2222         for (size_t channels = 1; channels <= 20; channels += 3) {
2223           for (size_t step = 2; step <= pooling_elements; step++) {
2224             ArgMaxPoolMicrokernelTester()
2225               .output_pixels(output_pixels)
2226               .pooling_elements(pooling_elements)
2227               .pooling_tile(9, 8)
2228               .step(step)
2229               .channels(channels)
2230               .output_stride(23)
2231               .Test(xnn_f32_argmaxpool_ukernel_9p8x__neon_c4);
2232           }
2233         }
2234       }
2235     }
2236   }
2237 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2238 
2239 
2240 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
2241 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_eq_4_twopass_fulltile)2242   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_eq_4_twopass_fulltile) {
2243     TEST_REQUIRES_X86_SSE2;
2244     ArgMaxPoolMicrokernelTester()
2245       .pooling_elements(17)
2246       .pooling_tile(9, 8)
2247       .channels(4)
2248       .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2249   }
2250 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_eq_4_twopass_fulltile_with_input_offset)2251   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_eq_4_twopass_fulltile_with_input_offset) {
2252     TEST_REQUIRES_X86_SSE2;
2253     ArgMaxPoolMicrokernelTester()
2254       .pooling_elements(17)
2255       .pooling_tile(9, 8)
2256       .channels(4)
2257       .input_offset(7)
2258       .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2259   }
2260 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_eq_4_twopass_subtile)2261   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_eq_4_twopass_subtile) {
2262     TEST_REQUIRES_X86_SSE2;
2263     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2264       ArgMaxPoolMicrokernelTester()
2265         .pooling_elements(pooling_elements)
2266         .pooling_tile(9, 8)
2267         .channels(4)
2268         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2269     }
2270   }
2271 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_eq_4_twopass_subtile_with_input_offset)2272   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_eq_4_twopass_subtile_with_input_offset) {
2273     TEST_REQUIRES_X86_SSE2;
2274     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2275       ArgMaxPoolMicrokernelTester()
2276         .pooling_elements(pooling_elements)
2277         .pooling_tile(9, 8)
2278         .channels(4)
2279         .input_offset(7)
2280         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2281     }
2282   }
2283 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_div_4_twopass_fulltile)2284   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_div_4_twopass_fulltile) {
2285     TEST_REQUIRES_X86_SSE2;
2286     for (size_t channels = 8; channels < 32; channels += 4) {
2287       ArgMaxPoolMicrokernelTester()
2288         .pooling_elements(17)
2289         .pooling_tile(9, 8)
2290         .channels(channels)
2291         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2292     }
2293   }
2294 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_div_4_twopass_fulltile_with_input_offset)2295   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_div_4_twopass_fulltile_with_input_offset) {
2296     TEST_REQUIRES_X86_SSE2;
2297     for (size_t channels = 8; channels < 32; channels += 4) {
2298       ArgMaxPoolMicrokernelTester()
2299         .pooling_elements(17)
2300         .pooling_tile(9, 8)
2301         .channels(channels)
2302         .input_offset(23)
2303         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2304     }
2305   }
2306 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_div_4_twopass_subtile)2307   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_div_4_twopass_subtile) {
2308     TEST_REQUIRES_X86_SSE2;
2309     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2310       for (size_t channels = 8; channels < 32; channels += 4) {
2311         ArgMaxPoolMicrokernelTester()
2312           .pooling_elements(17)
2313           .pooling_tile(9, 8)
2314           .channels(channels)
2315           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2316       }
2317     }
2318   }
2319 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_div_4_twopass_subtile_with_input_offset)2320   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_div_4_twopass_subtile_with_input_offset) {
2321     TEST_REQUIRES_X86_SSE2;
2322     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2323       for (size_t channels = 8; channels < 32; channels += 4) {
2324         ArgMaxPoolMicrokernelTester()
2325           .pooling_elements(17)
2326           .pooling_tile(9, 8)
2327           .channels(channels)
2328           .input_offset(37)
2329           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2330       }
2331     }
2332   }
2333 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_lt_4_twopass_fulltile)2334   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_lt_4_twopass_fulltile) {
2335     TEST_REQUIRES_X86_SSE2;
2336     for (size_t channels = 1; channels < 4; channels++) {
2337       ArgMaxPoolMicrokernelTester()
2338         .pooling_elements(17)
2339         .pooling_tile(9, 8)
2340         .channels(channels)
2341         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2342     }
2343   }
2344 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_lt_4_twopass_fulltile_with_input_offset)2345   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_lt_4_twopass_fulltile_with_input_offset) {
2346     TEST_REQUIRES_X86_SSE2;
2347     for (size_t channels = 1; channels < 4; channels++) {
2348       ArgMaxPoolMicrokernelTester()
2349         .pooling_elements(17)
2350         .pooling_tile(9, 8)
2351         .channels(channels)
2352         .input_offset(5)
2353         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2354     }
2355   }
2356 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_lt_4_twopass_subtile)2357   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_lt_4_twopass_subtile) {
2358     TEST_REQUIRES_X86_SSE2;
2359     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2360       for (size_t channels = 1; channels < 4; channels++) {
2361         ArgMaxPoolMicrokernelTester()
2362           .pooling_elements(17)
2363           .pooling_tile(9, 8)
2364           .channels(channels)
2365           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2366       }
2367     }
2368   }
2369 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_lt_4_twopass_subtile_with_input_offset)2370   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_lt_4_twopass_subtile_with_input_offset) {
2371     TEST_REQUIRES_X86_SSE2;
2372     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2373       for (size_t channels = 1; channels < 4; channels++) {
2374         ArgMaxPoolMicrokernelTester()
2375           .pooling_elements(17)
2376           .pooling_tile(9, 8)
2377           .channels(channels)
2378           .input_offset(5)
2379           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2380       }
2381     }
2382   }
2383 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_gt_4_twopass_fulltile)2384   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_gt_4_twopass_fulltile) {
2385     TEST_REQUIRES_X86_SSE2;
2386     for (size_t channels = 5; channels < 8; channels++) {
2387       ArgMaxPoolMicrokernelTester()
2388         .pooling_elements(17)
2389         .pooling_tile(9, 8)
2390         .channels(channels)
2391         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2392     }
2393   }
2394 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_gt_4_twopass_fulltile_with_input_offset)2395   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_gt_4_twopass_fulltile_with_input_offset) {
2396     TEST_REQUIRES_X86_SSE2;
2397     for (size_t channels = 5; channels < 8; channels++) {
2398       ArgMaxPoolMicrokernelTester()
2399         .pooling_elements(17)
2400         .pooling_tile(9, 8)
2401         .channels(channels)
2402         .input_offset(11)
2403         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2404     }
2405   }
2406 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_gt_4_twopass_subtile)2407   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_gt_4_twopass_subtile) {
2408     TEST_REQUIRES_X86_SSE2;
2409     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2410       for (size_t channels = 5; channels < 8; channels++) {
2411         ArgMaxPoolMicrokernelTester()
2412           .pooling_elements(17)
2413           .pooling_tile(9, 8)
2414           .channels(channels)
2415           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2416       }
2417     }
2418   }
2419 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_gt_4_twopass_subtile_with_input_offset)2420   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_gt_4_twopass_subtile_with_input_offset) {
2421     TEST_REQUIRES_X86_SSE2;
2422     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2423       for (size_t channels = 5; channels < 8; channels++) {
2424         ArgMaxPoolMicrokernelTester()
2425           .pooling_elements(17)
2426           .pooling_tile(9, 8)
2427           .channels(channels)
2428           .input_offset(11)
2429           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2430       }
2431     }
2432   }
2433 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_eq_4_multipass)2434   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_eq_4_multipass) {
2435     TEST_REQUIRES_X86_SSE2;
2436     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2437       ArgMaxPoolMicrokernelTester()
2438         .pooling_elements(17)
2439         .pooling_tile(9, 8)
2440         .channels(4)
2441         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2442     }
2443   }
2444 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_eq_4_multipass_with_input_offset)2445   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_eq_4_multipass_with_input_offset) {
2446     TEST_REQUIRES_X86_SSE2;
2447     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2448       ArgMaxPoolMicrokernelTester()
2449         .pooling_elements(17)
2450         .pooling_tile(9, 8)
2451         .channels(4)
2452         .input_offset(7)
2453         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2454     }
2455   }
2456 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_div_4_multipass)2457   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_div_4_multipass) {
2458     TEST_REQUIRES_X86_SSE2;
2459     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2460       for (size_t channels = 8; channels < 32; channels += 4) {
2461         ArgMaxPoolMicrokernelTester()
2462           .pooling_elements(17)
2463           .pooling_tile(9, 8)
2464           .channels(channels)
2465           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2466       }
2467     }
2468   }
2469 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_div_4_multipass_with_input_offset)2470   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_div_4_multipass_with_input_offset) {
2471     TEST_REQUIRES_X86_SSE2;
2472     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2473       for (size_t channels = 8; channels < 32; channels += 4) {
2474         ArgMaxPoolMicrokernelTester()
2475           .pooling_elements(17)
2476           .pooling_tile(9, 8)
2477           .channels(channels)
2478           .input_offset(37)
2479           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2480       }
2481     }
2482   }
2483 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_lt_4_multipass)2484   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_lt_4_multipass) {
2485     TEST_REQUIRES_X86_SSE2;
2486     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2487       for (size_t channels = 1; channels < 4; channels++) {
2488         ArgMaxPoolMicrokernelTester()
2489           .pooling_elements(17)
2490           .pooling_tile(9, 8)
2491           .channels(channels)
2492           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2493       }
2494     }
2495   }
2496 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_lt_4_multipass_with_input_offset)2497   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_lt_4_multipass_with_input_offset) {
2498     TEST_REQUIRES_X86_SSE2;
2499     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2500       for (size_t channels = 1; channels < 4; channels++) {
2501         ArgMaxPoolMicrokernelTester()
2502           .pooling_elements(17)
2503           .pooling_tile(9, 8)
2504           .channels(channels)
2505           .input_offset(4)
2506           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2507       }
2508     }
2509   }
2510 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_gt_4_multipass)2511   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_gt_4_multipass) {
2512     TEST_REQUIRES_X86_SSE2;
2513     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2514       for (size_t channels = 5; channels < 8; channels++) {
2515         ArgMaxPoolMicrokernelTester()
2516           .pooling_elements(17)
2517           .pooling_tile(9, 8)
2518           .channels(channels)
2519           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2520       }
2521     }
2522   }
2523 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_gt_4_multipass_with_input_offset)2524   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_gt_4_multipass_with_input_offset) {
2525     TEST_REQUIRES_X86_SSE2;
2526     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2527       for (size_t channels = 5; channels < 8; channels++) {
2528         ArgMaxPoolMicrokernelTester()
2529           .pooling_elements(17)
2530           .pooling_tile(9, 8)
2531           .channels(channels)
2532           .input_offset(11)
2533           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2534       }
2535     }
2536   }
2537 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,few_output_pixels)2538   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, few_output_pixels) {
2539     TEST_REQUIRES_X86_SSE2;
2540     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2541       for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
2542         for (size_t channels = 1; channels <= 20; channels += 3) {
2543           ArgMaxPoolMicrokernelTester()
2544             .output_pixels(output_pixels)
2545             .pooling_elements(pooling_elements)
2546             .pooling_tile(9, 8)
2547             .channels(channels)
2548             .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2549         }
2550       }
2551     }
2552   }
2553 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,few_output_pixels_with_input_offset)2554   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, few_output_pixels_with_input_offset) {
2555     TEST_REQUIRES_X86_SSE2;
2556     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2557       for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
2558         for (size_t channels = 1; channels <= 20; channels += 3) {
2559           ArgMaxPoolMicrokernelTester()
2560             .output_pixels(output_pixels)
2561             .pooling_elements(pooling_elements)
2562             .pooling_tile(9, 8)
2563             .channels(channels)
2564             .input_offset(23)
2565             .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2566         }
2567       }
2568     }
2569   }
2570 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,few_output_pixels_with_output_stride)2571   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, few_output_pixels_with_output_stride) {
2572     TEST_REQUIRES_X86_SSE2;
2573     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2574       for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
2575         for (size_t channels = 1; channels <= 20; channels += 3) {
2576           ArgMaxPoolMicrokernelTester()
2577             .output_pixels(output_pixels)
2578             .pooling_elements(pooling_elements)
2579             .pooling_tile(9, 8)
2580             .channels(channels)
2581             .output_stride(23)
2582             .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2583         }
2584       }
2585     }
2586   }
2587 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,few_output_pixels_with_step)2588   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, few_output_pixels_with_step) {
2589     TEST_REQUIRES_X86_SSE2;
2590     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2591       for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
2592         for (size_t channels = 1; channels <= 20; channels += 3) {
2593           for (size_t step = 2; step <= pooling_elements; step++) {
2594             ArgMaxPoolMicrokernelTester()
2595               .output_pixels(output_pixels)
2596               .pooling_elements(pooling_elements)
2597               .pooling_tile(9, 8)
2598               .step(step)
2599               .channels(channels)
2600               .output_stride(23)
2601               .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2602           }
2603         }
2604       }
2605     }
2606   }
2607 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2608 
2609 
2610 #if XNN_ARCH_WASMSIMD
2611 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,channels_eq_4_twopass_fulltile)2612   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, channels_eq_4_twopass_fulltile) {
2613     ArgMaxPoolMicrokernelTester()
2614       .pooling_elements(17)
2615       .pooling_tile(9, 8)
2616       .channels(4)
2617       .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2618   }
2619 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,channels_eq_4_twopass_fulltile_with_input_offset)2620   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, channels_eq_4_twopass_fulltile_with_input_offset) {
2621     ArgMaxPoolMicrokernelTester()
2622       .pooling_elements(17)
2623       .pooling_tile(9, 8)
2624       .channels(4)
2625       .input_offset(7)
2626       .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2627   }
2628 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,channels_eq_4_twopass_subtile)2629   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, channels_eq_4_twopass_subtile) {
2630     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2631       ArgMaxPoolMicrokernelTester()
2632         .pooling_elements(pooling_elements)
2633         .pooling_tile(9, 8)
2634         .channels(4)
2635         .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2636     }
2637   }
2638 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,channels_eq_4_twopass_subtile_with_input_offset)2639   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, channels_eq_4_twopass_subtile_with_input_offset) {
2640     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2641       ArgMaxPoolMicrokernelTester()
2642         .pooling_elements(pooling_elements)
2643         .pooling_tile(9, 8)
2644         .channels(4)
2645         .input_offset(7)
2646         .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2647     }
2648   }
2649 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,channels_div_4_twopass_fulltile)2650   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, channels_div_4_twopass_fulltile) {
2651     for (size_t channels = 8; channels < 32; channels += 4) {
2652       ArgMaxPoolMicrokernelTester()
2653         .pooling_elements(17)
2654         .pooling_tile(9, 8)
2655         .channels(channels)
2656         .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2657     }
2658   }
2659 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,channels_div_4_twopass_fulltile_with_input_offset)2660   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, channels_div_4_twopass_fulltile_with_input_offset) {
2661     for (size_t channels = 8; channels < 32; channels += 4) {
2662       ArgMaxPoolMicrokernelTester()
2663         .pooling_elements(17)
2664         .pooling_tile(9, 8)
2665         .channels(channels)
2666         .input_offset(23)
2667         .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2668     }
2669   }
2670 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,channels_div_4_twopass_subtile)2671   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, channels_div_4_twopass_subtile) {
2672     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2673       for (size_t channels = 8; channels < 32; channels += 4) {
2674         ArgMaxPoolMicrokernelTester()
2675           .pooling_elements(17)
2676           .pooling_tile(9, 8)
2677           .channels(channels)
2678           .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2679       }
2680     }
2681   }
2682 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,channels_div_4_twopass_subtile_with_input_offset)2683   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, channels_div_4_twopass_subtile_with_input_offset) {
2684     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2685       for (size_t channels = 8; channels < 32; channels += 4) {
2686         ArgMaxPoolMicrokernelTester()
2687           .pooling_elements(17)
2688           .pooling_tile(9, 8)
2689           .channels(channels)
2690           .input_offset(37)
2691           .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2692       }
2693     }
2694   }
2695 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,channels_lt_4_twopass_fulltile)2696   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, channels_lt_4_twopass_fulltile) {
2697     for (size_t channels = 1; channels < 4; channels++) {
2698       ArgMaxPoolMicrokernelTester()
2699         .pooling_elements(17)
2700         .pooling_tile(9, 8)
2701         .channels(channels)
2702         .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2703     }
2704   }
2705 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,channels_lt_4_twopass_fulltile_with_input_offset)2706   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, channels_lt_4_twopass_fulltile_with_input_offset) {
2707     for (size_t channels = 1; channels < 4; channels++) {
2708       ArgMaxPoolMicrokernelTester()
2709         .pooling_elements(17)
2710         .pooling_tile(9, 8)
2711         .channels(channels)
2712         .input_offset(5)
2713         .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2714     }
2715   }
2716 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,channels_lt_4_twopass_subtile)2717   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, channels_lt_4_twopass_subtile) {
2718     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2719       for (size_t channels = 1; channels < 4; channels++) {
2720         ArgMaxPoolMicrokernelTester()
2721           .pooling_elements(17)
2722           .pooling_tile(9, 8)
2723           .channels(channels)
2724           .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2725       }
2726     }
2727   }
2728 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,channels_lt_4_twopass_subtile_with_input_offset)2729   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, channels_lt_4_twopass_subtile_with_input_offset) {
2730     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2731       for (size_t channels = 1; channels < 4; channels++) {
2732         ArgMaxPoolMicrokernelTester()
2733           .pooling_elements(17)
2734           .pooling_tile(9, 8)
2735           .channels(channels)
2736           .input_offset(5)
2737           .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2738       }
2739     }
2740   }
2741 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,channels_gt_4_twopass_fulltile)2742   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, channels_gt_4_twopass_fulltile) {
2743     for (size_t channels = 5; channels < 8; channels++) {
2744       ArgMaxPoolMicrokernelTester()
2745         .pooling_elements(17)
2746         .pooling_tile(9, 8)
2747         .channels(channels)
2748         .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2749     }
2750   }
2751 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,channels_gt_4_twopass_fulltile_with_input_offset)2752   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, channels_gt_4_twopass_fulltile_with_input_offset) {
2753     for (size_t channels = 5; channels < 8; channels++) {
2754       ArgMaxPoolMicrokernelTester()
2755         .pooling_elements(17)
2756         .pooling_tile(9, 8)
2757         .channels(channels)
2758         .input_offset(11)
2759         .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2760     }
2761   }
2762 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,channels_gt_4_twopass_subtile)2763   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, channels_gt_4_twopass_subtile) {
2764     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2765       for (size_t channels = 5; channels < 8; channels++) {
2766         ArgMaxPoolMicrokernelTester()
2767           .pooling_elements(17)
2768           .pooling_tile(9, 8)
2769           .channels(channels)
2770           .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2771       }
2772     }
2773   }
2774 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,channels_gt_4_twopass_subtile_with_input_offset)2775   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, channels_gt_4_twopass_subtile_with_input_offset) {
2776     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2777       for (size_t channels = 5; channels < 8; channels++) {
2778         ArgMaxPoolMicrokernelTester()
2779           .pooling_elements(17)
2780           .pooling_tile(9, 8)
2781           .channels(channels)
2782           .input_offset(11)
2783           .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2784       }
2785     }
2786   }
2787 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,channels_eq_4_multipass)2788   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, channels_eq_4_multipass) {
2789     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2790       ArgMaxPoolMicrokernelTester()
2791         .pooling_elements(17)
2792         .pooling_tile(9, 8)
2793         .channels(4)
2794         .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2795     }
2796   }
2797 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,channels_eq_4_multipass_with_input_offset)2798   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, channels_eq_4_multipass_with_input_offset) {
2799     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2800       ArgMaxPoolMicrokernelTester()
2801         .pooling_elements(17)
2802         .pooling_tile(9, 8)
2803         .channels(4)
2804         .input_offset(7)
2805         .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2806     }
2807   }
2808 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,channels_div_4_multipass)2809   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, channels_div_4_multipass) {
2810     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2811       for (size_t channels = 8; channels < 32; channels += 4) {
2812         ArgMaxPoolMicrokernelTester()
2813           .pooling_elements(17)
2814           .pooling_tile(9, 8)
2815           .channels(channels)
2816           .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2817       }
2818     }
2819   }
2820 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,channels_div_4_multipass_with_input_offset)2821   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, channels_div_4_multipass_with_input_offset) {
2822     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2823       for (size_t channels = 8; channels < 32; channels += 4) {
2824         ArgMaxPoolMicrokernelTester()
2825           .pooling_elements(17)
2826           .pooling_tile(9, 8)
2827           .channels(channels)
2828           .input_offset(37)
2829           .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2830       }
2831     }
2832   }
2833 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,channels_lt_4_multipass)2834   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, channels_lt_4_multipass) {
2835     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2836       for (size_t channels = 1; channels < 4; channels++) {
2837         ArgMaxPoolMicrokernelTester()
2838           .pooling_elements(17)
2839           .pooling_tile(9, 8)
2840           .channels(channels)
2841           .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2842       }
2843     }
2844   }
2845 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,channels_lt_4_multipass_with_input_offset)2846   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, channels_lt_4_multipass_with_input_offset) {
2847     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2848       for (size_t channels = 1; channels < 4; channels++) {
2849         ArgMaxPoolMicrokernelTester()
2850           .pooling_elements(17)
2851           .pooling_tile(9, 8)
2852           .channels(channels)
2853           .input_offset(4)
2854           .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2855       }
2856     }
2857   }
2858 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,channels_gt_4_multipass)2859   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, channels_gt_4_multipass) {
2860     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2861       for (size_t channels = 5; channels < 8; channels++) {
2862         ArgMaxPoolMicrokernelTester()
2863           .pooling_elements(17)
2864           .pooling_tile(9, 8)
2865           .channels(channels)
2866           .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2867       }
2868     }
2869   }
2870 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,channels_gt_4_multipass_with_input_offset)2871   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, channels_gt_4_multipass_with_input_offset) {
2872     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2873       for (size_t channels = 5; channels < 8; channels++) {
2874         ArgMaxPoolMicrokernelTester()
2875           .pooling_elements(17)
2876           .pooling_tile(9, 8)
2877           .channels(channels)
2878           .input_offset(11)
2879           .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2880       }
2881     }
2882   }
2883 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,few_output_pixels)2884   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, few_output_pixels) {
2885     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2886       for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
2887         for (size_t channels = 1; channels <= 20; channels += 3) {
2888           ArgMaxPoolMicrokernelTester()
2889             .output_pixels(output_pixels)
2890             .pooling_elements(pooling_elements)
2891             .pooling_tile(9, 8)
2892             .channels(channels)
2893             .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2894         }
2895       }
2896     }
2897   }
2898 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,few_output_pixels_with_input_offset)2899   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, few_output_pixels_with_input_offset) {
2900     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2901       for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
2902         for (size_t channels = 1; channels <= 20; channels += 3) {
2903           ArgMaxPoolMicrokernelTester()
2904             .output_pixels(output_pixels)
2905             .pooling_elements(pooling_elements)
2906             .pooling_tile(9, 8)
2907             .channels(channels)
2908             .input_offset(23)
2909             .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2910         }
2911       }
2912     }
2913   }
2914 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,few_output_pixels_with_output_stride)2915   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, few_output_pixels_with_output_stride) {
2916     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2917       for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
2918         for (size_t channels = 1; channels <= 20; channels += 3) {
2919           ArgMaxPoolMicrokernelTester()
2920             .output_pixels(output_pixels)
2921             .pooling_elements(pooling_elements)
2922             .pooling_tile(9, 8)
2923             .channels(channels)
2924             .output_stride(23)
2925             .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2926         }
2927       }
2928     }
2929   }
2930 
TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4,few_output_pixels_with_step)2931   TEST(F32_ARGMAXPOOL_9P8X__WASMSIMD_C4, few_output_pixels_with_step) {
2932     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2933       for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
2934         for (size_t channels = 1; channels <= 20; channels += 3) {
2935           for (size_t step = 2; step <= pooling_elements; step++) {
2936             ArgMaxPoolMicrokernelTester()
2937               .output_pixels(output_pixels)
2938               .pooling_elements(pooling_elements)
2939               .pooling_tile(9, 8)
2940               .step(step)
2941               .channels(channels)
2942               .output_stride(23)
2943               .Test(xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4);
2944           }
2945         }
2946       }
2947     }
2948   }
2949 #endif  // XNN_ARCH_WASMSIMD
2950 
2951 
2952 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_eq_1_twopass_fulltile)2953 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_eq_1_twopass_fulltile) {
2954   ArgMaxPoolMicrokernelTester()
2955     .pooling_elements(17)
2956     .pooling_tile(9, 8)
2957     .channels(1)
2958     .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2959 }
2960 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_eq_1_twopass_fulltile_with_input_offset)2961 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_eq_1_twopass_fulltile_with_input_offset) {
2962   ArgMaxPoolMicrokernelTester()
2963     .pooling_elements(17)
2964     .pooling_tile(9, 8)
2965     .channels(1)
2966     .input_offset(3)
2967     .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2968 }
2969 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_eq_1_twopass_subtile)2970 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_eq_1_twopass_subtile) {
2971   for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2972     ArgMaxPoolMicrokernelTester()
2973       .pooling_elements(pooling_elements)
2974       .pooling_tile(9, 8)
2975       .channels(1)
2976       .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2977   }
2978 }
2979 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_eq_1_twopass_subtile_with_input_offset)2980 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_eq_1_twopass_subtile_with_input_offset) {
2981   for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2982     ArgMaxPoolMicrokernelTester()
2983       .pooling_elements(pooling_elements)
2984       .pooling_tile(9, 8)
2985       .channels(1)
2986       .input_offset(3)
2987       .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2988   }
2989 }
2990 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_gt_1_twopass_fulltile)2991 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_gt_1_twopass_fulltile) {
2992   for (size_t channels = 2; channels < 10; channels++) {
2993     ArgMaxPoolMicrokernelTester()
2994       .pooling_elements(17)
2995       .pooling_tile(9, 8)
2996       .channels(channels)
2997       .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2998   }
2999 }
3000 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_gt_1_twopass_fulltile_with_input_offset)3001 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_gt_1_twopass_fulltile_with_input_offset) {
3002   for (size_t channels = 2; channels < 10; channels++) {
3003     ArgMaxPoolMicrokernelTester()
3004       .pooling_elements(17)
3005       .pooling_tile(9, 8)
3006       .channels(channels)
3007       .input_offset(3)
3008       .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3009   }
3010 }
3011 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_gt_1_twopass_subtile)3012 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_gt_1_twopass_subtile) {
3013   for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
3014     for (size_t channels = 2; channels < 10; channels++) {
3015       ArgMaxPoolMicrokernelTester()
3016         .pooling_elements(17)
3017         .pooling_tile(9, 8)
3018         .channels(channels)
3019         .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3020     }
3021   }
3022 }
3023 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_gt_1_twopass_subtile_with_input_offset)3024 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_gt_1_twopass_subtile_with_input_offset) {
3025   for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
3026     for (size_t channels = 2; channels < 10; channels++) {
3027       ArgMaxPoolMicrokernelTester()
3028         .pooling_elements(17)
3029         .pooling_tile(9, 8)
3030         .channels(channels)
3031         .input_offset(3)
3032         .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3033     }
3034   }
3035 }
3036 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_eq_1_multipass)3037 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_eq_1_multipass) {
3038   for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
3039     ArgMaxPoolMicrokernelTester()
3040       .pooling_elements(17)
3041       .pooling_tile(9, 8)
3042       .channels(1)
3043       .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3044   }
3045 }
3046 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_eq_1_multipass_with_input_offset)3047 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_eq_1_multipass_with_input_offset) {
3048   for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
3049     ArgMaxPoolMicrokernelTester()
3050       .pooling_elements(17)
3051       .pooling_tile(9, 8)
3052       .channels(1)
3053       .input_offset(3)
3054       .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3055   }
3056 }
3057 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_gt_1_multipass)3058 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_gt_1_multipass) {
3059   for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
3060     for (size_t channels = 2; channels < 10; channels++) {
3061       ArgMaxPoolMicrokernelTester()
3062         .pooling_elements(17)
3063         .pooling_tile(9, 8)
3064         .channels(channels)
3065         .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3066     }
3067   }
3068 }
3069 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_gt_1_multipass_with_input_offset)3070 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_gt_1_multipass_with_input_offset) {
3071   for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
3072     for (size_t channels = 2; channels < 10; channels++) {
3073       ArgMaxPoolMicrokernelTester()
3074         .pooling_elements(17)
3075         .pooling_tile(9, 8)
3076         .channels(channels)
3077         .input_offset(3)
3078         .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3079     }
3080   }
3081 }
3082 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,few_output_pixels)3083 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, few_output_pixels) {
3084   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
3085     for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
3086       for (size_t channels = 1; channels <= 5; channels += 1) {
3087         ArgMaxPoolMicrokernelTester()
3088           .output_pixels(output_pixels)
3089           .pooling_elements(pooling_elements)
3090           .pooling_tile(9, 8)
3091           .channels(channels)
3092           .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3093       }
3094     }
3095   }
3096 }
3097 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,few_output_pixels_with_input_offset)3098 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, few_output_pixels_with_input_offset) {
3099   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
3100     for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
3101       for (size_t channels = 1; channels <= 5; channels += 1) {
3102         ArgMaxPoolMicrokernelTester()
3103           .output_pixels(output_pixels)
3104           .pooling_elements(pooling_elements)
3105           .pooling_tile(9, 8)
3106           .channels(channels)
3107           .input_offset(7)
3108           .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3109       }
3110     }
3111   }
3112 }
3113 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,few_output_pixels_with_output_stride)3114 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, few_output_pixels_with_output_stride) {
3115   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
3116     for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
3117       for (size_t channels = 1; channels <= 5; channels += 1) {
3118         ArgMaxPoolMicrokernelTester()
3119           .output_pixels(output_pixels)
3120           .pooling_elements(pooling_elements)
3121           .pooling_tile(9, 8)
3122           .channels(channels)
3123           .output_stride(7)
3124           .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3125       }
3126     }
3127   }
3128 }
3129 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,few_output_pixels_with_step)3130 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, few_output_pixels_with_step) {
3131   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
3132     for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
3133       for (size_t channels = 1; channels <= 5; channels += 1) {
3134         for (size_t step = 2; step <= pooling_elements; step++) {
3135           ArgMaxPoolMicrokernelTester()
3136             .output_pixels(output_pixels)
3137             .pooling_elements(pooling_elements)
3138             .pooling_tile(9, 8)
3139             .step(step)
3140             .channels(channels)
3141             .output_stride(7)
3142             .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3143         }
3144       }
3145     }
3146   }
3147 }