• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 //   Specification: test/f32-argmaxpool.yaml
8 //   Generator: tools/generate-argmaxpool-test.py
9 
10 
11 #include <gtest/gtest.h>
12 
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15 
16 #include <xnnpack/argmaxpool.h>
17 #include "argmaxpool-microkernel-tester.h"
18 
19 
20 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_eq_4_unipass_fulltile)21   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_eq_4_unipass_fulltile) {
22     TEST_REQUIRES_X86_SSE2;
23     ArgMaxPoolMicrokernelTester()
24       .pooling_elements(4)
25       .pooling_tile(4)
26       .channels(4)
27       .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
28   }
29 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_eq_4_unipass_fulltile_with_input_offset)30   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_eq_4_unipass_fulltile_with_input_offset) {
31     TEST_REQUIRES_X86_SSE2;
32     ArgMaxPoolMicrokernelTester()
33       .pooling_elements(4)
34       .pooling_tile(4)
35       .channels(4)
36       .input_offset(7)
37       .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
38   }
39 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_eq_4_unipass_fulltile_with_qmin)40   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_eq_4_unipass_fulltile_with_qmin) {
41     TEST_REQUIRES_X86_SSE2;
42     ArgMaxPoolMicrokernelTester()
43       .pooling_elements(4)
44       .pooling_tile(4)
45       .channels(4)
46       .qmin(192)
47       .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
48   }
49 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_eq_4_unipass_fulltile_with_qmax)50   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_eq_4_unipass_fulltile_with_qmax) {
51     TEST_REQUIRES_X86_SSE2;
52     ArgMaxPoolMicrokernelTester()
53       .pooling_elements(4)
54       .pooling_tile(4)
55       .channels(4)
56       .qmax(192)
57       .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
58   }
59 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_eq_4_unipass_subtile)60   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_eq_4_unipass_subtile) {
61     TEST_REQUIRES_X86_SSE2;
62     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
63       ArgMaxPoolMicrokernelTester()
64         .pooling_elements(pooling_elements)
65         .pooling_tile(4)
66         .channels(4)
67         .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
68     }
69   }
70 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_eq_4_unipass_subtile_with_input_offset)71   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_eq_4_unipass_subtile_with_input_offset) {
72     TEST_REQUIRES_X86_SSE2;
73     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
74       ArgMaxPoolMicrokernelTester()
75         .pooling_elements(pooling_elements)
76         .pooling_tile(4)
77         .channels(4)
78         .input_offset(7)
79         .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
80     }
81   }
82 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_div_4_unipass_fulltile)83   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_div_4_unipass_fulltile) {
84     TEST_REQUIRES_X86_SSE2;
85     for (size_t channels = 8; channels < 32; channels += 4) {
86       ArgMaxPoolMicrokernelTester()
87         .pooling_elements(4)
88         .pooling_tile(4)
89         .channels(channels)
90         .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
91     }
92   }
93 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_div_4_unipass_fulltile_with_input_offset)94   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_div_4_unipass_fulltile_with_input_offset) {
95     TEST_REQUIRES_X86_SSE2;
96     for (size_t channels = 8; channels < 32; channels += 4) {
97       ArgMaxPoolMicrokernelTester()
98         .pooling_elements(4)
99         .pooling_tile(4)
100         .channels(channels)
101         .input_offset(37)
102         .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
103     }
104   }
105 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_div_4_unipass_fulltile_with_qmin)106   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_div_4_unipass_fulltile_with_qmin) {
107     TEST_REQUIRES_X86_SSE2;
108     for (size_t channels = 8; channels < 32; channels += 4) {
109       ArgMaxPoolMicrokernelTester()
110         .pooling_elements(4)
111         .pooling_tile(4)
112         .channels(channels)
113         .qmin(192)
114         .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
115     }
116   }
117 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_div_4_unipass_fulltile_with_qmax)118   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_div_4_unipass_fulltile_with_qmax) {
119     TEST_REQUIRES_X86_SSE2;
120     for (size_t channels = 8; channels < 32; channels += 4) {
121       ArgMaxPoolMicrokernelTester()
122         .pooling_elements(4)
123         .pooling_tile(4)
124         .channels(channels)
125         .qmax(192)
126         .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
127     }
128   }
129 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_div_4_unipass_subtile)130   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_div_4_unipass_subtile) {
131     TEST_REQUIRES_X86_SSE2;
132     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
133       for (size_t channels = 8; channels < 32; channels += 4) {
134         ArgMaxPoolMicrokernelTester()
135           .pooling_elements(pooling_elements)
136           .pooling_tile(4)
137           .channels(channels)
138           .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
139       }
140     }
141   }
142 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_div_4_unipass_subtile_with_input_offset)143   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_div_4_unipass_subtile_with_input_offset) {
144     TEST_REQUIRES_X86_SSE2;
145     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
146       for (size_t channels = 8; channels < 32; channels += 4) {
147         ArgMaxPoolMicrokernelTester()
148           .pooling_elements(pooling_elements)
149           .pooling_tile(4)
150           .channels(channels)
151           .input_offset(37)
152           .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
153       }
154     }
155   }
156 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_lt_4_unipass_fulltile)157   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_lt_4_unipass_fulltile) {
158     TEST_REQUIRES_X86_SSE2;
159     for (size_t channels = 1; channels < 4; channels++) {
160       ArgMaxPoolMicrokernelTester()
161         .pooling_elements(4)
162         .pooling_tile(4)
163         .channels(channels)
164         .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
165     }
166   }
167 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_lt_4_unipass_fulltile_with_input_offset)168   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_lt_4_unipass_fulltile_with_input_offset) {
169     TEST_REQUIRES_X86_SSE2;
170     for (size_t channels = 1; channels < 4; channels++) {
171       ArgMaxPoolMicrokernelTester()
172         .pooling_elements(4)
173         .pooling_tile(4)
174         .channels(channels)
175         .input_offset(5)
176         .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
177     }
178   }
179 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_lt_4_unipass_fulltile_with_qmin)180   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_lt_4_unipass_fulltile_with_qmin) {
181     TEST_REQUIRES_X86_SSE2;
182     for (size_t channels = 1; channels < 4; channels++) {
183       ArgMaxPoolMicrokernelTester()
184         .pooling_elements(4)
185         .pooling_tile(4)
186         .channels(channels)
187         .qmin(192)
188         .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
189     }
190   }
191 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_lt_4_unipass_fulltile_with_qmax)192   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_lt_4_unipass_fulltile_with_qmax) {
193     TEST_REQUIRES_X86_SSE2;
194     for (size_t channels = 1; channels < 4; channels++) {
195       ArgMaxPoolMicrokernelTester()
196         .pooling_elements(4)
197         .pooling_tile(4)
198         .channels(channels)
199         .qmax(192)
200         .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
201     }
202   }
203 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_lt_4_unipass_subtile)204   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_lt_4_unipass_subtile) {
205     TEST_REQUIRES_X86_SSE2;
206     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
207       for (size_t channels = 1; channels < 4; channels++) {
208         ArgMaxPoolMicrokernelTester()
209           .pooling_elements(pooling_elements)
210           .pooling_tile(4)
211           .channels(channels)
212           .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
213       }
214     }
215   }
216 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_lt_4_unipass_subtile_with_input_offset)217   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_lt_4_unipass_subtile_with_input_offset) {
218     TEST_REQUIRES_X86_SSE2;
219     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
220       for (size_t channels = 1; channels < 4; channels++) {
221         ArgMaxPoolMicrokernelTester()
222           .pooling_elements(pooling_elements)
223           .pooling_tile(4)
224           .channels(channels)
225           .input_offset(5)
226           .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
227       }
228     }
229   }
230 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_gt_4_unipass_fulltile)231   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_gt_4_unipass_fulltile) {
232     TEST_REQUIRES_X86_SSE2;
233     for (size_t channels = 5; channels < 8; channels++) {
234       ArgMaxPoolMicrokernelTester()
235         .pooling_elements(4)
236         .pooling_tile(4)
237         .channels(channels)
238         .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
239     }
240   }
241 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_gt_4_unipass_fulltile_with_input_offset)242   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_gt_4_unipass_fulltile_with_input_offset) {
243     TEST_REQUIRES_X86_SSE2;
244     for (size_t channels = 5; channels < 8; channels++) {
245       ArgMaxPoolMicrokernelTester()
246         .pooling_elements(4)
247         .pooling_tile(4)
248         .channels(channels)
249         .input_offset(11)
250         .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
251     }
252   }
253 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_gt_4_unipass_fulltile_with_qmin)254   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_gt_4_unipass_fulltile_with_qmin) {
255     TEST_REQUIRES_X86_SSE2;
256     for (size_t channels = 5; channels < 8; channels++) {
257       ArgMaxPoolMicrokernelTester()
258         .pooling_elements(4)
259         .pooling_tile(4)
260         .channels(channels)
261         .qmin(192)
262         .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
263     }
264   }
265 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_gt_4_unipass_fulltile_with_qmax)266   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_gt_4_unipass_fulltile_with_qmax) {
267     TEST_REQUIRES_X86_SSE2;
268     for (size_t channels = 5; channels < 8; channels++) {
269       ArgMaxPoolMicrokernelTester()
270         .pooling_elements(4)
271         .pooling_tile(4)
272         .channels(channels)
273         .qmax(192)
274         .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
275     }
276   }
277 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_gt_4_unipass_subtile)278   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_gt_4_unipass_subtile) {
279     TEST_REQUIRES_X86_SSE2;
280     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
281       for (size_t channels = 5; channels < 8; channels++) {
282         ArgMaxPoolMicrokernelTester()
283           .pooling_elements(pooling_elements)
284           .pooling_tile(4)
285           .channels(channels)
286           .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
287       }
288     }
289   }
290 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,channels_gt_4_unipass_subtile_with_input_offset)291   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, channels_gt_4_unipass_subtile_with_input_offset) {
292     TEST_REQUIRES_X86_SSE2;
293     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
294       for (size_t channels = 5; channels < 8; channels++) {
295         ArgMaxPoolMicrokernelTester()
296           .pooling_elements(pooling_elements)
297           .pooling_tile(4)
298           .channels(channels)
299           .input_offset(11)
300           .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
301       }
302     }
303   }
304 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,few_output_pixels)305   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, few_output_pixels) {
306     TEST_REQUIRES_X86_SSE2;
307     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
308       for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
309         for (size_t channels = 1; channels <= 20; channels += 3) {
310           ArgMaxPoolMicrokernelTester()
311             .output_pixels(output_pixels)
312             .pooling_elements(pooling_elements)
313             .pooling_tile(4)
314             .channels(channels)
315             .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
316         }
317       }
318     }
319   }
320 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,few_output_pixels_with_input_offset)321   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, few_output_pixels_with_input_offset) {
322     TEST_REQUIRES_X86_SSE2;
323     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
324       for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
325         for (size_t channels = 1; channels <= 20; channels += 3) {
326           ArgMaxPoolMicrokernelTester()
327             .output_pixels(output_pixels)
328             .pooling_elements(pooling_elements)
329             .pooling_tile(4)
330             .channels(channels)
331             .input_offset(23)
332             .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
333         }
334       }
335     }
336   }
337 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,few_output_pixels_with_qmin)338   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, few_output_pixels_with_qmin) {
339     TEST_REQUIRES_X86_SSE2;
340     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
341       for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
342         for (size_t channels = 1; channels <= 20; channels += 3) {
343           ArgMaxPoolMicrokernelTester()
344             .output_pixels(output_pixels)
345             .pooling_elements(pooling_elements)
346             .pooling_tile(4)
347             .channels(channels)
348             .qmin(192)
349             .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
350         }
351       }
352     }
353   }
354 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,few_output_pixels_with_qmax)355   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, few_output_pixels_with_qmax) {
356     TEST_REQUIRES_X86_SSE2;
357     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
358       for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
359         for (size_t channels = 1; channels <= 20; channels += 3) {
360           ArgMaxPoolMicrokernelTester()
361             .output_pixels(output_pixels)
362             .pooling_elements(pooling_elements)
363             .pooling_tile(4)
364             .channels(channels)
365             .qmax(192)
366             .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
367         }
368       }
369     }
370   }
371 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,few_output_pixels_with_output_stride)372   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, few_output_pixels_with_output_stride) {
373     TEST_REQUIRES_X86_SSE2;
374     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
375       for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
376         for (size_t channels = 1; channels <= 20; channels += 3) {
377           ArgMaxPoolMicrokernelTester()
378             .output_pixels(output_pixels)
379             .pooling_elements(pooling_elements)
380             .pooling_tile(4)
381             .channels(channels)
382             .output_stride(23)
383             .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
384         }
385       }
386     }
387   }
388 
TEST(F32_ARGMAXPOOL_4X__SSE2_C4,few_output_pixels_with_step)389   TEST(F32_ARGMAXPOOL_4X__SSE2_C4, few_output_pixels_with_step) {
390     TEST_REQUIRES_X86_SSE2;
391     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
392       for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
393         for (size_t channels = 1; channels <= 20; channels += 3) {
394           for (size_t step = 2; step <= pooling_elements; step++) {
395             ArgMaxPoolMicrokernelTester()
396               .output_pixels(output_pixels)
397               .pooling_elements(pooling_elements)
398               .pooling_tile(4)
399               .step(step)
400               .channels(channels)
401               .output_stride(23)
402               .Test(xnn_f32_argmaxpool_ukernel_4x__sse2_c4);
403           }
404         }
405       }
406     }
407   }
408 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
409 
410 
411 #if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,channels_eq_4_unipass_fulltile)412   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, channels_eq_4_unipass_fulltile) {
413     TEST_REQUIRES_PSIMD;
414     ArgMaxPoolMicrokernelTester()
415       .pooling_elements(4)
416       .pooling_tile(4)
417       .channels(4)
418       .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
419   }
420 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,channels_eq_4_unipass_fulltile_with_input_offset)421   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, channels_eq_4_unipass_fulltile_with_input_offset) {
422     TEST_REQUIRES_PSIMD;
423     ArgMaxPoolMicrokernelTester()
424       .pooling_elements(4)
425       .pooling_tile(4)
426       .channels(4)
427       .input_offset(7)
428       .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
429   }
430 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,channels_eq_4_unipass_fulltile_with_qmin)431   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, channels_eq_4_unipass_fulltile_with_qmin) {
432     TEST_REQUIRES_PSIMD;
433     ArgMaxPoolMicrokernelTester()
434       .pooling_elements(4)
435       .pooling_tile(4)
436       .channels(4)
437       .qmin(192)
438       .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
439   }
440 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,channels_eq_4_unipass_fulltile_with_qmax)441   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, channels_eq_4_unipass_fulltile_with_qmax) {
442     TEST_REQUIRES_PSIMD;
443     ArgMaxPoolMicrokernelTester()
444       .pooling_elements(4)
445       .pooling_tile(4)
446       .channels(4)
447       .qmax(192)
448       .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
449   }
450 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,channels_eq_4_unipass_subtile)451   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, channels_eq_4_unipass_subtile) {
452     TEST_REQUIRES_PSIMD;
453     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
454       ArgMaxPoolMicrokernelTester()
455         .pooling_elements(pooling_elements)
456         .pooling_tile(4)
457         .channels(4)
458         .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
459     }
460   }
461 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,channels_eq_4_unipass_subtile_with_input_offset)462   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, channels_eq_4_unipass_subtile_with_input_offset) {
463     TEST_REQUIRES_PSIMD;
464     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
465       ArgMaxPoolMicrokernelTester()
466         .pooling_elements(pooling_elements)
467         .pooling_tile(4)
468         .channels(4)
469         .input_offset(7)
470         .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
471     }
472   }
473 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,channels_div_4_unipass_fulltile)474   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, channels_div_4_unipass_fulltile) {
475     TEST_REQUIRES_PSIMD;
476     for (size_t channels = 8; channels < 32; channels += 4) {
477       ArgMaxPoolMicrokernelTester()
478         .pooling_elements(4)
479         .pooling_tile(4)
480         .channels(channels)
481         .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
482     }
483   }
484 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,channels_div_4_unipass_fulltile_with_input_offset)485   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, channels_div_4_unipass_fulltile_with_input_offset) {
486     TEST_REQUIRES_PSIMD;
487     for (size_t channels = 8; channels < 32; channels += 4) {
488       ArgMaxPoolMicrokernelTester()
489         .pooling_elements(4)
490         .pooling_tile(4)
491         .channels(channels)
492         .input_offset(37)
493         .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
494     }
495   }
496 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,channels_div_4_unipass_fulltile_with_qmin)497   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, channels_div_4_unipass_fulltile_with_qmin) {
498     TEST_REQUIRES_PSIMD;
499     for (size_t channels = 8; channels < 32; channels += 4) {
500       ArgMaxPoolMicrokernelTester()
501         .pooling_elements(4)
502         .pooling_tile(4)
503         .channels(channels)
504         .qmin(192)
505         .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
506     }
507   }
508 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,channels_div_4_unipass_fulltile_with_qmax)509   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, channels_div_4_unipass_fulltile_with_qmax) {
510     TEST_REQUIRES_PSIMD;
511     for (size_t channels = 8; channels < 32; channels += 4) {
512       ArgMaxPoolMicrokernelTester()
513         .pooling_elements(4)
514         .pooling_tile(4)
515         .channels(channels)
516         .qmax(192)
517         .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
518     }
519   }
520 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,channels_div_4_unipass_subtile)521   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, channels_div_4_unipass_subtile) {
522     TEST_REQUIRES_PSIMD;
523     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
524       for (size_t channels = 8; channels < 32; channels += 4) {
525         ArgMaxPoolMicrokernelTester()
526           .pooling_elements(pooling_elements)
527           .pooling_tile(4)
528           .channels(channels)
529           .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
530       }
531     }
532   }
533 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,channels_div_4_unipass_subtile_with_input_offset)534   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, channels_div_4_unipass_subtile_with_input_offset) {
535     TEST_REQUIRES_PSIMD;
536     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
537       for (size_t channels = 8; channels < 32; channels += 4) {
538         ArgMaxPoolMicrokernelTester()
539           .pooling_elements(pooling_elements)
540           .pooling_tile(4)
541           .channels(channels)
542           .input_offset(37)
543           .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
544       }
545     }
546   }
547 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,channels_lt_4_unipass_fulltile)548   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, channels_lt_4_unipass_fulltile) {
549     TEST_REQUIRES_PSIMD;
550     for (size_t channels = 1; channels < 4; channels++) {
551       ArgMaxPoolMicrokernelTester()
552         .pooling_elements(4)
553         .pooling_tile(4)
554         .channels(channels)
555         .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
556     }
557   }
558 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,channels_lt_4_unipass_fulltile_with_input_offset)559   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, channels_lt_4_unipass_fulltile_with_input_offset) {
560     TEST_REQUIRES_PSIMD;
561     for (size_t channels = 1; channels < 4; channels++) {
562       ArgMaxPoolMicrokernelTester()
563         .pooling_elements(4)
564         .pooling_tile(4)
565         .channels(channels)
566         .input_offset(5)
567         .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
568     }
569   }
570 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,channels_lt_4_unipass_fulltile_with_qmin)571   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, channels_lt_4_unipass_fulltile_with_qmin) {
572     TEST_REQUIRES_PSIMD;
573     for (size_t channels = 1; channels < 4; channels++) {
574       ArgMaxPoolMicrokernelTester()
575         .pooling_elements(4)
576         .pooling_tile(4)
577         .channels(channels)
578         .qmin(192)
579         .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
580     }
581   }
582 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,channels_lt_4_unipass_fulltile_with_qmax)583   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, channels_lt_4_unipass_fulltile_with_qmax) {
584     TEST_REQUIRES_PSIMD;
585     for (size_t channels = 1; channels < 4; channels++) {
586       ArgMaxPoolMicrokernelTester()
587         .pooling_elements(4)
588         .pooling_tile(4)
589         .channels(channels)
590         .qmax(192)
591         .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
592     }
593   }
594 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,channels_lt_4_unipass_subtile)595   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, channels_lt_4_unipass_subtile) {
596     TEST_REQUIRES_PSIMD;
597     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
598       for (size_t channels = 1; channels < 4; channels++) {
599         ArgMaxPoolMicrokernelTester()
600           .pooling_elements(pooling_elements)
601           .pooling_tile(4)
602           .channels(channels)
603           .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
604       }
605     }
606   }
607 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,channels_lt_4_unipass_subtile_with_input_offset)608   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, channels_lt_4_unipass_subtile_with_input_offset) {
609     TEST_REQUIRES_PSIMD;
610     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
611       for (size_t channels = 1; channels < 4; channels++) {
612         ArgMaxPoolMicrokernelTester()
613           .pooling_elements(pooling_elements)
614           .pooling_tile(4)
615           .channels(channels)
616           .input_offset(5)
617           .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
618       }
619     }
620   }
621 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,channels_gt_4_unipass_fulltile)622   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, channels_gt_4_unipass_fulltile) {
623     TEST_REQUIRES_PSIMD;
624     for (size_t channels = 5; channels < 8; channels++) {
625       ArgMaxPoolMicrokernelTester()
626         .pooling_elements(4)
627         .pooling_tile(4)
628         .channels(channels)
629         .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
630     }
631   }
632 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,channels_gt_4_unipass_fulltile_with_input_offset)633   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, channels_gt_4_unipass_fulltile_with_input_offset) {
634     TEST_REQUIRES_PSIMD;
635     for (size_t channels = 5; channels < 8; channels++) {
636       ArgMaxPoolMicrokernelTester()
637         .pooling_elements(4)
638         .pooling_tile(4)
639         .channels(channels)
640         .input_offset(11)
641         .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
642     }
643   }
644 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,channels_gt_4_unipass_fulltile_with_qmin)645   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, channels_gt_4_unipass_fulltile_with_qmin) {
646     TEST_REQUIRES_PSIMD;
647     for (size_t channels = 5; channels < 8; channels++) {
648       ArgMaxPoolMicrokernelTester()
649         .pooling_elements(4)
650         .pooling_tile(4)
651         .channels(channels)
652         .qmin(192)
653         .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
654     }
655   }
656 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,channels_gt_4_unipass_fulltile_with_qmax)657   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, channels_gt_4_unipass_fulltile_with_qmax) {
658     TEST_REQUIRES_PSIMD;
659     for (size_t channels = 5; channels < 8; channels++) {
660       ArgMaxPoolMicrokernelTester()
661         .pooling_elements(4)
662         .pooling_tile(4)
663         .channels(channels)
664         .qmax(192)
665         .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
666     }
667   }
668 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,channels_gt_4_unipass_subtile)669   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, channels_gt_4_unipass_subtile) {
670     TEST_REQUIRES_PSIMD;
671     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
672       for (size_t channels = 5; channels < 8; channels++) {
673         ArgMaxPoolMicrokernelTester()
674           .pooling_elements(pooling_elements)
675           .pooling_tile(4)
676           .channels(channels)
677           .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
678       }
679     }
680   }
681 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,channels_gt_4_unipass_subtile_with_input_offset)682   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, channels_gt_4_unipass_subtile_with_input_offset) {
683     TEST_REQUIRES_PSIMD;
684     for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
685       for (size_t channels = 5; channels < 8; channels++) {
686         ArgMaxPoolMicrokernelTester()
687           .pooling_elements(pooling_elements)
688           .pooling_tile(4)
689           .channels(channels)
690           .input_offset(11)
691           .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
692       }
693     }
694   }
695 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,few_output_pixels)696   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, few_output_pixels) {
697     TEST_REQUIRES_PSIMD;
698     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
699       for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
700         for (size_t channels = 1; channels <= 20; channels += 3) {
701           ArgMaxPoolMicrokernelTester()
702             .output_pixels(output_pixels)
703             .pooling_elements(pooling_elements)
704             .pooling_tile(4)
705             .channels(channels)
706             .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
707         }
708       }
709     }
710   }
711 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,few_output_pixels_with_input_offset)712   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, few_output_pixels_with_input_offset) {
713     TEST_REQUIRES_PSIMD;
714     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
715       for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
716         for (size_t channels = 1; channels <= 20; channels += 3) {
717           ArgMaxPoolMicrokernelTester()
718             .output_pixels(output_pixels)
719             .pooling_elements(pooling_elements)
720             .pooling_tile(4)
721             .channels(channels)
722             .input_offset(23)
723             .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
724         }
725       }
726     }
727   }
728 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,few_output_pixels_with_qmin)729   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, few_output_pixels_with_qmin) {
730     TEST_REQUIRES_PSIMD;
731     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
732       for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
733         for (size_t channels = 1; channels <= 20; channels += 3) {
734           ArgMaxPoolMicrokernelTester()
735             .output_pixels(output_pixels)
736             .pooling_elements(pooling_elements)
737             .pooling_tile(4)
738             .channels(channels)
739             .qmin(192)
740             .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
741         }
742       }
743     }
744   }
745 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,few_output_pixels_with_qmax)746   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, few_output_pixels_with_qmax) {
747     TEST_REQUIRES_PSIMD;
748     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
749       for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
750         for (size_t channels = 1; channels <= 20; channels += 3) {
751           ArgMaxPoolMicrokernelTester()
752             .output_pixels(output_pixels)
753             .pooling_elements(pooling_elements)
754             .pooling_tile(4)
755             .channels(channels)
756             .qmax(192)
757             .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
758         }
759       }
760     }
761   }
762 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,few_output_pixels_with_output_stride)763   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, few_output_pixels_with_output_stride) {
764     TEST_REQUIRES_PSIMD;
765     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
766       for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
767         for (size_t channels = 1; channels <= 20; channels += 3) {
768           ArgMaxPoolMicrokernelTester()
769             .output_pixels(output_pixels)
770             .pooling_elements(pooling_elements)
771             .pooling_tile(4)
772             .channels(channels)
773             .output_stride(23)
774             .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
775         }
776       }
777     }
778   }
779 
TEST(F32_ARGMAXPOOL_4X__PSIMD_C4,few_output_pixels_with_step)780   TEST(F32_ARGMAXPOOL_4X__PSIMD_C4, few_output_pixels_with_step) {
781     TEST_REQUIRES_PSIMD;
782     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
783       for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
784         for (size_t channels = 1; channels <= 20; channels += 3) {
785           for (size_t step = 2; step <= pooling_elements; step++) {
786             ArgMaxPoolMicrokernelTester()
787               .output_pixels(output_pixels)
788               .pooling_elements(pooling_elements)
789               .pooling_tile(4)
790               .step(step)
791               .channels(channels)
792               .output_stride(23)
793               .Test(xnn_f32_argmaxpool_ukernel_4x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
794           }
795         }
796       }
797     }
798   }
799 #endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
800 
801 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,channels_eq_1_unipass_fulltile)802 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, channels_eq_1_unipass_fulltile) {
803   ArgMaxPoolMicrokernelTester()
804     .pooling_elements(4)
805     .pooling_tile(4)
806     .channels(1)
807     .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
808 }
809 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,channels_eq_1_unipass_fulltile_with_input_offset)810 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, channels_eq_1_unipass_fulltile_with_input_offset) {
811   ArgMaxPoolMicrokernelTester()
812     .pooling_elements(4)
813     .pooling_tile(4)
814     .channels(1)
815     .input_offset(3)
816     .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
817 }
818 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,channels_eq_1_unipass_fulltile_with_qmin)819 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, channels_eq_1_unipass_fulltile_with_qmin) {
820   ArgMaxPoolMicrokernelTester()
821     .pooling_elements(4)
822     .pooling_tile(4)
823     .channels(1)
824     .qmin(192)
825     .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
826 }
827 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,channels_eq_1_unipass_fulltile_with_qmax)828 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, channels_eq_1_unipass_fulltile_with_qmax) {
829   ArgMaxPoolMicrokernelTester()
830     .pooling_elements(4)
831     .pooling_tile(4)
832     .channels(1)
833     .qmax(192)
834     .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
835 }
836 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,channels_eq_1_unipass_subtile)837 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, channels_eq_1_unipass_subtile) {
838   for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
839     ArgMaxPoolMicrokernelTester()
840       .pooling_elements(pooling_elements)
841       .pooling_tile(4)
842       .channels(1)
843       .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
844   }
845 }
846 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,channels_eq_1_unipass_subtile_with_input_offset)847 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, channels_eq_1_unipass_subtile_with_input_offset) {
848   for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
849     ArgMaxPoolMicrokernelTester()
850       .pooling_elements(pooling_elements)
851       .pooling_tile(4)
852       .channels(1)
853       .input_offset(3)
854       .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
855   }
856 }
857 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,channels_gt_1_unipass_fulltile)858 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, channels_gt_1_unipass_fulltile) {
859   for (size_t channels = 2; channels < 10; channels++) {
860     ArgMaxPoolMicrokernelTester()
861       .pooling_elements(4)
862       .pooling_tile(4)
863       .channels(channels)
864       .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
865   }
866 }
867 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,channels_gt_1_unipass_fulltile_with_input_offset)868 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, channels_gt_1_unipass_fulltile_with_input_offset) {
869   for (size_t channels = 2; channels < 10; channels++) {
870     ArgMaxPoolMicrokernelTester()
871       .pooling_elements(4)
872       .pooling_tile(4)
873       .channels(channels)
874       .input_offset(3)
875       .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
876   }
877 }
878 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,channels_gt_1_unipass_fulltile_with_qmin)879 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, channels_gt_1_unipass_fulltile_with_qmin) {
880   for (size_t channels = 2; channels < 10; channels++) {
881     ArgMaxPoolMicrokernelTester()
882       .pooling_elements(4)
883       .pooling_tile(4)
884       .channels(channels)
885       .qmin(192)
886       .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
887   }
888 }
889 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,channels_gt_1_unipass_fulltile_with_qmax)890 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, channels_gt_1_unipass_fulltile_with_qmax) {
891   for (size_t channels = 2; channels < 10; channels++) {
892     ArgMaxPoolMicrokernelTester()
893       .pooling_elements(4)
894       .pooling_tile(4)
895       .channels(channels)
896       .qmax(192)
897       .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
898   }
899 }
900 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,channels_gt_1_unipass_subtile)901 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, channels_gt_1_unipass_subtile) {
902   for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
903     for (size_t channels = 2; channels < 10; channels++) {
904       ArgMaxPoolMicrokernelTester()
905         .pooling_elements(pooling_elements)
906         .pooling_tile(4)
907         .channels(channels)
908         .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
909     }
910   }
911 }
912 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,channels_gt_1_unipass_subtile_with_input_offset)913 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, channels_gt_1_unipass_subtile_with_input_offset) {
914   for (size_t pooling_elements = 2; pooling_elements < 4; pooling_elements++) {
915     for (size_t channels = 2; channels < 10; channels++) {
916       ArgMaxPoolMicrokernelTester()
917         .pooling_elements(pooling_elements)
918         .pooling_tile(4)
919         .channels(channels)
920         .input_offset(3)
921         .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
922     }
923   }
924 }
925 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,few_output_pixels)926 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, few_output_pixels) {
927   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
928     for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
929       for (size_t channels = 1; channels <= 5; channels += 1) {
930         ArgMaxPoolMicrokernelTester()
931           .output_pixels(output_pixels)
932           .pooling_elements(pooling_elements)
933           .pooling_tile(4)
934           .channels(channels)
935           .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
936       }
937     }
938   }
939 }
940 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,few_output_pixels_with_input_offset)941 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, few_output_pixels_with_input_offset) {
942   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
943     for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
944       for (size_t channels = 1; channels <= 5; channels += 1) {
945         ArgMaxPoolMicrokernelTester()
946           .output_pixels(output_pixels)
947           .pooling_elements(pooling_elements)
948           .pooling_tile(4)
949           .channels(channels)
950           .input_offset(7)
951           .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
952       }
953     }
954   }
955 }
956 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,few_output_pixels_with_qmin)957 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, few_output_pixels_with_qmin) {
958   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
959     for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
960       for (size_t channels = 1; channels <= 5; channels += 1) {
961         ArgMaxPoolMicrokernelTester()
962           .output_pixels(output_pixels)
963           .pooling_elements(pooling_elements)
964           .pooling_tile(4)
965           .channels(channels)
966           .qmin(192)
967           .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
968       }
969     }
970   }
971 }
972 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,few_output_pixels_with_qmax)973 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, few_output_pixels_with_qmax) {
974   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
975     for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
976       for (size_t channels = 1; channels <= 5; channels += 1) {
977         ArgMaxPoolMicrokernelTester()
978           .output_pixels(output_pixels)
979           .pooling_elements(pooling_elements)
980           .pooling_tile(4)
981           .channels(channels)
982           .qmax(192)
983           .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
984       }
985     }
986   }
987 }
988 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,few_output_pixels_with_output_stride)989 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, few_output_pixels_with_output_stride) {
990   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
991     for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
992       for (size_t channels = 1; channels <= 5; channels += 1) {
993         ArgMaxPoolMicrokernelTester()
994           .output_pixels(output_pixels)
995           .pooling_elements(pooling_elements)
996           .pooling_tile(4)
997           .channels(channels)
998           .output_stride(7)
999           .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1000       }
1001     }
1002   }
1003 }
1004 
TEST(F32_ARGMAXPOOL_4X__SCALAR_C1,few_output_pixels_with_step)1005 TEST(F32_ARGMAXPOOL_4X__SCALAR_C1, few_output_pixels_with_step) {
1006   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1007     for (size_t pooling_elements = 2; pooling_elements <= 4; pooling_elements++) {
1008       for (size_t channels = 1; channels <= 5; channels += 1) {
1009         for (size_t step = 2; step <= pooling_elements; step++) {
1010           ArgMaxPoolMicrokernelTester()
1011             .output_pixels(output_pixels)
1012             .pooling_elements(pooling_elements)
1013             .pooling_tile(4)
1014             .step(step)
1015             .channels(channels)
1016             .output_stride(7)
1017             .Test(xnn_f32_argmaxpool_ukernel_4x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1018         }
1019       }
1020     }
1021   }
1022 }
1023 
1024 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_eq_4_unipass_fulltile)1025   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_eq_4_unipass_fulltile) {
1026     TEST_REQUIRES_X86_SSE2;
1027     ArgMaxPoolMicrokernelTester()
1028       .pooling_elements(9)
1029       .pooling_tile(9)
1030       .channels(4)
1031       .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1032   }
1033 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_eq_4_unipass_fulltile_with_input_offset)1034   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_eq_4_unipass_fulltile_with_input_offset) {
1035     TEST_REQUIRES_X86_SSE2;
1036     ArgMaxPoolMicrokernelTester()
1037       .pooling_elements(9)
1038       .pooling_tile(9)
1039       .channels(4)
1040       .input_offset(7)
1041       .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1042   }
1043 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_eq_4_unipass_fulltile_with_qmin)1044   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_eq_4_unipass_fulltile_with_qmin) {
1045     TEST_REQUIRES_X86_SSE2;
1046     ArgMaxPoolMicrokernelTester()
1047       .pooling_elements(9)
1048       .pooling_tile(9)
1049       .channels(4)
1050       .qmin(192)
1051       .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1052   }
1053 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_eq_4_unipass_fulltile_with_qmax)1054   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_eq_4_unipass_fulltile_with_qmax) {
1055     TEST_REQUIRES_X86_SSE2;
1056     ArgMaxPoolMicrokernelTester()
1057       .pooling_elements(9)
1058       .pooling_tile(9)
1059       .channels(4)
1060       .qmax(192)
1061       .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1062   }
1063 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_eq_4_unipass_subtile)1064   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_eq_4_unipass_subtile) {
1065     TEST_REQUIRES_X86_SSE2;
1066     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1067       ArgMaxPoolMicrokernelTester()
1068         .pooling_elements(pooling_elements)
1069         .pooling_tile(9)
1070         .channels(4)
1071         .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1072     }
1073   }
1074 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_eq_4_unipass_subtile_with_input_offset)1075   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_eq_4_unipass_subtile_with_input_offset) {
1076     TEST_REQUIRES_X86_SSE2;
1077     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1078       ArgMaxPoolMicrokernelTester()
1079         .pooling_elements(pooling_elements)
1080         .pooling_tile(9)
1081         .channels(4)
1082         .input_offset(7)
1083         .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1084     }
1085   }
1086 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_div_4_unipass_fulltile)1087   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_div_4_unipass_fulltile) {
1088     TEST_REQUIRES_X86_SSE2;
1089     for (size_t channels = 8; channels < 32; channels += 4) {
1090       ArgMaxPoolMicrokernelTester()
1091         .pooling_elements(9)
1092         .pooling_tile(9)
1093         .channels(channels)
1094         .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1095     }
1096   }
1097 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_div_4_unipass_fulltile_with_input_offset)1098   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_div_4_unipass_fulltile_with_input_offset) {
1099     TEST_REQUIRES_X86_SSE2;
1100     for (size_t channels = 8; channels < 32; channels += 4) {
1101       ArgMaxPoolMicrokernelTester()
1102         .pooling_elements(9)
1103         .pooling_tile(9)
1104         .channels(channels)
1105         .input_offset(37)
1106         .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1107     }
1108   }
1109 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_div_4_unipass_fulltile_with_qmin)1110   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_div_4_unipass_fulltile_with_qmin) {
1111     TEST_REQUIRES_X86_SSE2;
1112     for (size_t channels = 8; channels < 32; channels += 4) {
1113       ArgMaxPoolMicrokernelTester()
1114         .pooling_elements(9)
1115         .pooling_tile(9)
1116         .channels(channels)
1117         .qmin(192)
1118         .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1119     }
1120   }
1121 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_div_4_unipass_fulltile_with_qmax)1122   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_div_4_unipass_fulltile_with_qmax) {
1123     TEST_REQUIRES_X86_SSE2;
1124     for (size_t channels = 8; channels < 32; channels += 4) {
1125       ArgMaxPoolMicrokernelTester()
1126         .pooling_elements(9)
1127         .pooling_tile(9)
1128         .channels(channels)
1129         .qmax(192)
1130         .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1131     }
1132   }
1133 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_div_4_unipass_subtile)1134   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_div_4_unipass_subtile) {
1135     TEST_REQUIRES_X86_SSE2;
1136     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1137       for (size_t channels = 8; channels < 32; channels += 4) {
1138         ArgMaxPoolMicrokernelTester()
1139           .pooling_elements(pooling_elements)
1140           .pooling_tile(9)
1141           .channels(channels)
1142           .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1143       }
1144     }
1145   }
1146 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_div_4_unipass_subtile_with_input_offset)1147   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_div_4_unipass_subtile_with_input_offset) {
1148     TEST_REQUIRES_X86_SSE2;
1149     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1150       for (size_t channels = 8; channels < 32; channels += 4) {
1151         ArgMaxPoolMicrokernelTester()
1152           .pooling_elements(pooling_elements)
1153           .pooling_tile(9)
1154           .channels(channels)
1155           .input_offset(37)
1156           .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1157       }
1158     }
1159   }
1160 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_lt_4_unipass_fulltile)1161   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_lt_4_unipass_fulltile) {
1162     TEST_REQUIRES_X86_SSE2;
1163     for (size_t channels = 1; channels < 4; channels++) {
1164       ArgMaxPoolMicrokernelTester()
1165         .pooling_elements(9)
1166         .pooling_tile(9)
1167         .channels(channels)
1168         .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1169     }
1170   }
1171 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_lt_4_unipass_fulltile_with_input_offset)1172   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_lt_4_unipass_fulltile_with_input_offset) {
1173     TEST_REQUIRES_X86_SSE2;
1174     for (size_t channels = 1; channels < 4; channels++) {
1175       ArgMaxPoolMicrokernelTester()
1176         .pooling_elements(9)
1177         .pooling_tile(9)
1178         .channels(channels)
1179         .input_offset(5)
1180         .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1181     }
1182   }
1183 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_lt_4_unipass_fulltile_with_qmin)1184   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_lt_4_unipass_fulltile_with_qmin) {
1185     TEST_REQUIRES_X86_SSE2;
1186     for (size_t channels = 1; channels < 4; channels++) {
1187       ArgMaxPoolMicrokernelTester()
1188         .pooling_elements(9)
1189         .pooling_tile(9)
1190         .channels(channels)
1191         .qmin(192)
1192         .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1193     }
1194   }
1195 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_lt_4_unipass_fulltile_with_qmax)1196   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_lt_4_unipass_fulltile_with_qmax) {
1197     TEST_REQUIRES_X86_SSE2;
1198     for (size_t channels = 1; channels < 4; channels++) {
1199       ArgMaxPoolMicrokernelTester()
1200         .pooling_elements(9)
1201         .pooling_tile(9)
1202         .channels(channels)
1203         .qmax(192)
1204         .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1205     }
1206   }
1207 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_lt_4_unipass_subtile)1208   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_lt_4_unipass_subtile) {
1209     TEST_REQUIRES_X86_SSE2;
1210     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1211       for (size_t channels = 1; channels < 4; channels++) {
1212         ArgMaxPoolMicrokernelTester()
1213           .pooling_elements(pooling_elements)
1214           .pooling_tile(9)
1215           .channels(channels)
1216           .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1217       }
1218     }
1219   }
1220 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_lt_4_unipass_subtile_with_input_offset)1221   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_lt_4_unipass_subtile_with_input_offset) {
1222     TEST_REQUIRES_X86_SSE2;
1223     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1224       for (size_t channels = 1; channels < 4; channels++) {
1225         ArgMaxPoolMicrokernelTester()
1226           .pooling_elements(pooling_elements)
1227           .pooling_tile(9)
1228           .channels(channels)
1229           .input_offset(5)
1230           .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1231       }
1232     }
1233   }
1234 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_gt_4_unipass_fulltile)1235   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_gt_4_unipass_fulltile) {
1236     TEST_REQUIRES_X86_SSE2;
1237     for (size_t channels = 5; channels < 8; channels++) {
1238       ArgMaxPoolMicrokernelTester()
1239         .pooling_elements(9)
1240         .pooling_tile(9)
1241         .channels(channels)
1242         .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1243     }
1244   }
1245 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_gt_4_unipass_fulltile_with_input_offset)1246   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_gt_4_unipass_fulltile_with_input_offset) {
1247     TEST_REQUIRES_X86_SSE2;
1248     for (size_t channels = 5; channels < 8; channels++) {
1249       ArgMaxPoolMicrokernelTester()
1250         .pooling_elements(9)
1251         .pooling_tile(9)
1252         .channels(channels)
1253         .input_offset(11)
1254         .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1255     }
1256   }
1257 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_gt_4_unipass_fulltile_with_qmin)1258   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_gt_4_unipass_fulltile_with_qmin) {
1259     TEST_REQUIRES_X86_SSE2;
1260     for (size_t channels = 5; channels < 8; channels++) {
1261       ArgMaxPoolMicrokernelTester()
1262         .pooling_elements(9)
1263         .pooling_tile(9)
1264         .channels(channels)
1265         .qmin(192)
1266         .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1267     }
1268   }
1269 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_gt_4_unipass_fulltile_with_qmax)1270   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_gt_4_unipass_fulltile_with_qmax) {
1271     TEST_REQUIRES_X86_SSE2;
1272     for (size_t channels = 5; channels < 8; channels++) {
1273       ArgMaxPoolMicrokernelTester()
1274         .pooling_elements(9)
1275         .pooling_tile(9)
1276         .channels(channels)
1277         .qmax(192)
1278         .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1279     }
1280   }
1281 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_gt_4_unipass_subtile)1282   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_gt_4_unipass_subtile) {
1283     TEST_REQUIRES_X86_SSE2;
1284     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1285       for (size_t channels = 5; channels < 8; channels++) {
1286         ArgMaxPoolMicrokernelTester()
1287           .pooling_elements(pooling_elements)
1288           .pooling_tile(9)
1289           .channels(channels)
1290           .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1291       }
1292     }
1293   }
1294 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,channels_gt_4_unipass_subtile_with_input_offset)1295   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, channels_gt_4_unipass_subtile_with_input_offset) {
1296     TEST_REQUIRES_X86_SSE2;
1297     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1298       for (size_t channels = 5; channels < 8; channels++) {
1299         ArgMaxPoolMicrokernelTester()
1300           .pooling_elements(pooling_elements)
1301           .pooling_tile(9)
1302           .channels(channels)
1303           .input_offset(11)
1304           .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1305       }
1306     }
1307   }
1308 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,few_output_pixels)1309   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, few_output_pixels) {
1310     TEST_REQUIRES_X86_SSE2;
1311     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1312       for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1313         for (size_t channels = 1; channels <= 20; channels += 3) {
1314           ArgMaxPoolMicrokernelTester()
1315             .output_pixels(output_pixels)
1316             .pooling_elements(pooling_elements)
1317             .pooling_tile(9)
1318             .channels(channels)
1319             .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1320         }
1321       }
1322     }
1323   }
1324 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,few_output_pixels_with_input_offset)1325   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, few_output_pixels_with_input_offset) {
1326     TEST_REQUIRES_X86_SSE2;
1327     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1328       for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1329         for (size_t channels = 1; channels <= 20; channels += 3) {
1330           ArgMaxPoolMicrokernelTester()
1331             .output_pixels(output_pixels)
1332             .pooling_elements(pooling_elements)
1333             .pooling_tile(9)
1334             .channels(channels)
1335             .input_offset(23)
1336             .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1337         }
1338       }
1339     }
1340   }
1341 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,few_output_pixels_with_qmin)1342   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, few_output_pixels_with_qmin) {
1343     TEST_REQUIRES_X86_SSE2;
1344     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1345       for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1346         for (size_t channels = 1; channels <= 20; channels += 3) {
1347           ArgMaxPoolMicrokernelTester()
1348             .output_pixels(output_pixels)
1349             .pooling_elements(pooling_elements)
1350             .pooling_tile(9)
1351             .channels(channels)
1352             .qmin(192)
1353             .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1354         }
1355       }
1356     }
1357   }
1358 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,few_output_pixels_with_qmax)1359   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, few_output_pixels_with_qmax) {
1360     TEST_REQUIRES_X86_SSE2;
1361     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1362       for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1363         for (size_t channels = 1; channels <= 20; channels += 3) {
1364           ArgMaxPoolMicrokernelTester()
1365             .output_pixels(output_pixels)
1366             .pooling_elements(pooling_elements)
1367             .pooling_tile(9)
1368             .channels(channels)
1369             .qmax(192)
1370             .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1371         }
1372       }
1373     }
1374   }
1375 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,few_output_pixels_with_output_stride)1376   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, few_output_pixels_with_output_stride) {
1377     TEST_REQUIRES_X86_SSE2;
1378     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1379       for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1380         for (size_t channels = 1; channels <= 20; channels += 3) {
1381           ArgMaxPoolMicrokernelTester()
1382             .output_pixels(output_pixels)
1383             .pooling_elements(pooling_elements)
1384             .pooling_tile(9)
1385             .channels(channels)
1386             .output_stride(23)
1387             .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1388         }
1389       }
1390     }
1391   }
1392 
TEST(F32_ARGMAXPOOL_9X__SSE2_C4,few_output_pixels_with_step)1393   TEST(F32_ARGMAXPOOL_9X__SSE2_C4, few_output_pixels_with_step) {
1394     TEST_REQUIRES_X86_SSE2;
1395     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1396       for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1397         for (size_t channels = 1; channels <= 20; channels += 3) {
1398           for (size_t step = 2; step <= pooling_elements; step++) {
1399             ArgMaxPoolMicrokernelTester()
1400               .output_pixels(output_pixels)
1401               .pooling_elements(pooling_elements)
1402               .pooling_tile(9)
1403               .step(step)
1404               .channels(channels)
1405               .output_stride(23)
1406               .Test(xnn_f32_argmaxpool_ukernel_9x__sse2_c4);
1407           }
1408         }
1409       }
1410     }
1411   }
1412 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1413 
1414 
1415 #if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,channels_eq_4_unipass_fulltile)1416   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, channels_eq_4_unipass_fulltile) {
1417     TEST_REQUIRES_PSIMD;
1418     ArgMaxPoolMicrokernelTester()
1419       .pooling_elements(9)
1420       .pooling_tile(9)
1421       .channels(4)
1422       .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1423   }
1424 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,channels_eq_4_unipass_fulltile_with_input_offset)1425   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, channels_eq_4_unipass_fulltile_with_input_offset) {
1426     TEST_REQUIRES_PSIMD;
1427     ArgMaxPoolMicrokernelTester()
1428       .pooling_elements(9)
1429       .pooling_tile(9)
1430       .channels(4)
1431       .input_offset(7)
1432       .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1433   }
1434 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,channels_eq_4_unipass_fulltile_with_qmin)1435   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, channels_eq_4_unipass_fulltile_with_qmin) {
1436     TEST_REQUIRES_PSIMD;
1437     ArgMaxPoolMicrokernelTester()
1438       .pooling_elements(9)
1439       .pooling_tile(9)
1440       .channels(4)
1441       .qmin(192)
1442       .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1443   }
1444 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,channels_eq_4_unipass_fulltile_with_qmax)1445   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, channels_eq_4_unipass_fulltile_with_qmax) {
1446     TEST_REQUIRES_PSIMD;
1447     ArgMaxPoolMicrokernelTester()
1448       .pooling_elements(9)
1449       .pooling_tile(9)
1450       .channels(4)
1451       .qmax(192)
1452       .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1453   }
1454 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,channels_eq_4_unipass_subtile)1455   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, channels_eq_4_unipass_subtile) {
1456     TEST_REQUIRES_PSIMD;
1457     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1458       ArgMaxPoolMicrokernelTester()
1459         .pooling_elements(pooling_elements)
1460         .pooling_tile(9)
1461         .channels(4)
1462         .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1463     }
1464   }
1465 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,channels_eq_4_unipass_subtile_with_input_offset)1466   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, channels_eq_4_unipass_subtile_with_input_offset) {
1467     TEST_REQUIRES_PSIMD;
1468     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1469       ArgMaxPoolMicrokernelTester()
1470         .pooling_elements(pooling_elements)
1471         .pooling_tile(9)
1472         .channels(4)
1473         .input_offset(7)
1474         .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1475     }
1476   }
1477 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,channels_div_4_unipass_fulltile)1478   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, channels_div_4_unipass_fulltile) {
1479     TEST_REQUIRES_PSIMD;
1480     for (size_t channels = 8; channels < 32; channels += 4) {
1481       ArgMaxPoolMicrokernelTester()
1482         .pooling_elements(9)
1483         .pooling_tile(9)
1484         .channels(channels)
1485         .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1486     }
1487   }
1488 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,channels_div_4_unipass_fulltile_with_input_offset)1489   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, channels_div_4_unipass_fulltile_with_input_offset) {
1490     TEST_REQUIRES_PSIMD;
1491     for (size_t channels = 8; channels < 32; channels += 4) {
1492       ArgMaxPoolMicrokernelTester()
1493         .pooling_elements(9)
1494         .pooling_tile(9)
1495         .channels(channels)
1496         .input_offset(37)
1497         .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1498     }
1499   }
1500 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,channels_div_4_unipass_fulltile_with_qmin)1501   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, channels_div_4_unipass_fulltile_with_qmin) {
1502     TEST_REQUIRES_PSIMD;
1503     for (size_t channels = 8; channels < 32; channels += 4) {
1504       ArgMaxPoolMicrokernelTester()
1505         .pooling_elements(9)
1506         .pooling_tile(9)
1507         .channels(channels)
1508         .qmin(192)
1509         .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1510     }
1511   }
1512 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,channels_div_4_unipass_fulltile_with_qmax)1513   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, channels_div_4_unipass_fulltile_with_qmax) {
1514     TEST_REQUIRES_PSIMD;
1515     for (size_t channels = 8; channels < 32; channels += 4) {
1516       ArgMaxPoolMicrokernelTester()
1517         .pooling_elements(9)
1518         .pooling_tile(9)
1519         .channels(channels)
1520         .qmax(192)
1521         .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1522     }
1523   }
1524 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,channels_div_4_unipass_subtile)1525   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, channels_div_4_unipass_subtile) {
1526     TEST_REQUIRES_PSIMD;
1527     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1528       for (size_t channels = 8; channels < 32; channels += 4) {
1529         ArgMaxPoolMicrokernelTester()
1530           .pooling_elements(pooling_elements)
1531           .pooling_tile(9)
1532           .channels(channels)
1533           .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1534       }
1535     }
1536   }
1537 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,channels_div_4_unipass_subtile_with_input_offset)1538   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, channels_div_4_unipass_subtile_with_input_offset) {
1539     TEST_REQUIRES_PSIMD;
1540     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1541       for (size_t channels = 8; channels < 32; channels += 4) {
1542         ArgMaxPoolMicrokernelTester()
1543           .pooling_elements(pooling_elements)
1544           .pooling_tile(9)
1545           .channels(channels)
1546           .input_offset(37)
1547           .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1548       }
1549     }
1550   }
1551 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,channels_lt_4_unipass_fulltile)1552   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, channels_lt_4_unipass_fulltile) {
1553     TEST_REQUIRES_PSIMD;
1554     for (size_t channels = 1; channels < 4; channels++) {
1555       ArgMaxPoolMicrokernelTester()
1556         .pooling_elements(9)
1557         .pooling_tile(9)
1558         .channels(channels)
1559         .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1560     }
1561   }
1562 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,channels_lt_4_unipass_fulltile_with_input_offset)1563   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, channels_lt_4_unipass_fulltile_with_input_offset) {
1564     TEST_REQUIRES_PSIMD;
1565     for (size_t channels = 1; channels < 4; channels++) {
1566       ArgMaxPoolMicrokernelTester()
1567         .pooling_elements(9)
1568         .pooling_tile(9)
1569         .channels(channels)
1570         .input_offset(5)
1571         .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1572     }
1573   }
1574 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,channels_lt_4_unipass_fulltile_with_qmin)1575   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, channels_lt_4_unipass_fulltile_with_qmin) {
1576     TEST_REQUIRES_PSIMD;
1577     for (size_t channels = 1; channels < 4; channels++) {
1578       ArgMaxPoolMicrokernelTester()
1579         .pooling_elements(9)
1580         .pooling_tile(9)
1581         .channels(channels)
1582         .qmin(192)
1583         .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1584     }
1585   }
1586 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,channels_lt_4_unipass_fulltile_with_qmax)1587   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, channels_lt_4_unipass_fulltile_with_qmax) {
1588     TEST_REQUIRES_PSIMD;
1589     for (size_t channels = 1; channels < 4; channels++) {
1590       ArgMaxPoolMicrokernelTester()
1591         .pooling_elements(9)
1592         .pooling_tile(9)
1593         .channels(channels)
1594         .qmax(192)
1595         .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1596     }
1597   }
1598 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,channels_lt_4_unipass_subtile)1599   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, channels_lt_4_unipass_subtile) {
1600     TEST_REQUIRES_PSIMD;
1601     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1602       for (size_t channels = 1; channels < 4; channels++) {
1603         ArgMaxPoolMicrokernelTester()
1604           .pooling_elements(pooling_elements)
1605           .pooling_tile(9)
1606           .channels(channels)
1607           .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1608       }
1609     }
1610   }
1611 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,channels_lt_4_unipass_subtile_with_input_offset)1612   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, channels_lt_4_unipass_subtile_with_input_offset) {
1613     TEST_REQUIRES_PSIMD;
1614     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1615       for (size_t channels = 1; channels < 4; channels++) {
1616         ArgMaxPoolMicrokernelTester()
1617           .pooling_elements(pooling_elements)
1618           .pooling_tile(9)
1619           .channels(channels)
1620           .input_offset(5)
1621           .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1622       }
1623     }
1624   }
1625 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,channels_gt_4_unipass_fulltile)1626   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, channels_gt_4_unipass_fulltile) {
1627     TEST_REQUIRES_PSIMD;
1628     for (size_t channels = 5; channels < 8; channels++) {
1629       ArgMaxPoolMicrokernelTester()
1630         .pooling_elements(9)
1631         .pooling_tile(9)
1632         .channels(channels)
1633         .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1634     }
1635   }
1636 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,channels_gt_4_unipass_fulltile_with_input_offset)1637   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, channels_gt_4_unipass_fulltile_with_input_offset) {
1638     TEST_REQUIRES_PSIMD;
1639     for (size_t channels = 5; channels < 8; channels++) {
1640       ArgMaxPoolMicrokernelTester()
1641         .pooling_elements(9)
1642         .pooling_tile(9)
1643         .channels(channels)
1644         .input_offset(11)
1645         .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1646     }
1647   }
1648 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,channels_gt_4_unipass_fulltile_with_qmin)1649   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, channels_gt_4_unipass_fulltile_with_qmin) {
1650     TEST_REQUIRES_PSIMD;
1651     for (size_t channels = 5; channels < 8; channels++) {
1652       ArgMaxPoolMicrokernelTester()
1653         .pooling_elements(9)
1654         .pooling_tile(9)
1655         .channels(channels)
1656         .qmin(192)
1657         .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1658     }
1659   }
1660 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,channels_gt_4_unipass_fulltile_with_qmax)1661   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, channels_gt_4_unipass_fulltile_with_qmax) {
1662     TEST_REQUIRES_PSIMD;
1663     for (size_t channels = 5; channels < 8; channels++) {
1664       ArgMaxPoolMicrokernelTester()
1665         .pooling_elements(9)
1666         .pooling_tile(9)
1667         .channels(channels)
1668         .qmax(192)
1669         .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1670     }
1671   }
1672 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,channels_gt_4_unipass_subtile)1673   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, channels_gt_4_unipass_subtile) {
1674     TEST_REQUIRES_PSIMD;
1675     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1676       for (size_t channels = 5; channels < 8; channels++) {
1677         ArgMaxPoolMicrokernelTester()
1678           .pooling_elements(pooling_elements)
1679           .pooling_tile(9)
1680           .channels(channels)
1681           .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1682       }
1683     }
1684   }
1685 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,channels_gt_4_unipass_subtile_with_input_offset)1686   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, channels_gt_4_unipass_subtile_with_input_offset) {
1687     TEST_REQUIRES_PSIMD;
1688     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1689       for (size_t channels = 5; channels < 8; channels++) {
1690         ArgMaxPoolMicrokernelTester()
1691           .pooling_elements(pooling_elements)
1692           .pooling_tile(9)
1693           .channels(channels)
1694           .input_offset(11)
1695           .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1696       }
1697     }
1698   }
1699 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,few_output_pixels)1700   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, few_output_pixels) {
1701     TEST_REQUIRES_PSIMD;
1702     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1703       for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1704         for (size_t channels = 1; channels <= 20; channels += 3) {
1705           ArgMaxPoolMicrokernelTester()
1706             .output_pixels(output_pixels)
1707             .pooling_elements(pooling_elements)
1708             .pooling_tile(9)
1709             .channels(channels)
1710             .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1711         }
1712       }
1713     }
1714   }
1715 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,few_output_pixels_with_input_offset)1716   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, few_output_pixels_with_input_offset) {
1717     TEST_REQUIRES_PSIMD;
1718     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1719       for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1720         for (size_t channels = 1; channels <= 20; channels += 3) {
1721           ArgMaxPoolMicrokernelTester()
1722             .output_pixels(output_pixels)
1723             .pooling_elements(pooling_elements)
1724             .pooling_tile(9)
1725             .channels(channels)
1726             .input_offset(23)
1727             .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1728         }
1729       }
1730     }
1731   }
1732 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,few_output_pixels_with_qmin)1733   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, few_output_pixels_with_qmin) {
1734     TEST_REQUIRES_PSIMD;
1735     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1736       for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1737         for (size_t channels = 1; channels <= 20; channels += 3) {
1738           ArgMaxPoolMicrokernelTester()
1739             .output_pixels(output_pixels)
1740             .pooling_elements(pooling_elements)
1741             .pooling_tile(9)
1742             .channels(channels)
1743             .qmin(192)
1744             .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1745         }
1746       }
1747     }
1748   }
1749 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,few_output_pixels_with_qmax)1750   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, few_output_pixels_with_qmax) {
1751     TEST_REQUIRES_PSIMD;
1752     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1753       for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1754         for (size_t channels = 1; channels <= 20; channels += 3) {
1755           ArgMaxPoolMicrokernelTester()
1756             .output_pixels(output_pixels)
1757             .pooling_elements(pooling_elements)
1758             .pooling_tile(9)
1759             .channels(channels)
1760             .qmax(192)
1761             .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1762         }
1763       }
1764     }
1765   }
1766 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,few_output_pixels_with_output_stride)1767   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, few_output_pixels_with_output_stride) {
1768     TEST_REQUIRES_PSIMD;
1769     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1770       for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1771         for (size_t channels = 1; channels <= 20; channels += 3) {
1772           ArgMaxPoolMicrokernelTester()
1773             .output_pixels(output_pixels)
1774             .pooling_elements(pooling_elements)
1775             .pooling_tile(9)
1776             .channels(channels)
1777             .output_stride(23)
1778             .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1779         }
1780       }
1781     }
1782   }
1783 
TEST(F32_ARGMAXPOOL_9X__PSIMD_C4,few_output_pixels_with_step)1784   TEST(F32_ARGMAXPOOL_9X__PSIMD_C4, few_output_pixels_with_step) {
1785     TEST_REQUIRES_PSIMD;
1786     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1787       for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1788         for (size_t channels = 1; channels <= 20; channels += 3) {
1789           for (size_t step = 2; step <= pooling_elements; step++) {
1790             ArgMaxPoolMicrokernelTester()
1791               .output_pixels(output_pixels)
1792               .pooling_elements(pooling_elements)
1793               .pooling_tile(9)
1794               .step(step)
1795               .channels(channels)
1796               .output_stride(23)
1797               .Test(xnn_f32_argmaxpool_ukernel_9x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1798           }
1799         }
1800       }
1801     }
1802   }
1803 #endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
1804 
1805 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,channels_eq_1_unipass_fulltile)1806 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, channels_eq_1_unipass_fulltile) {
1807   ArgMaxPoolMicrokernelTester()
1808     .pooling_elements(9)
1809     .pooling_tile(9)
1810     .channels(1)
1811     .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1812 }
1813 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,channels_eq_1_unipass_fulltile_with_input_offset)1814 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, channels_eq_1_unipass_fulltile_with_input_offset) {
1815   ArgMaxPoolMicrokernelTester()
1816     .pooling_elements(9)
1817     .pooling_tile(9)
1818     .channels(1)
1819     .input_offset(3)
1820     .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1821 }
1822 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,channels_eq_1_unipass_fulltile_with_qmin)1823 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, channels_eq_1_unipass_fulltile_with_qmin) {
1824   ArgMaxPoolMicrokernelTester()
1825     .pooling_elements(9)
1826     .pooling_tile(9)
1827     .channels(1)
1828     .qmin(192)
1829     .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1830 }
1831 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,channels_eq_1_unipass_fulltile_with_qmax)1832 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, channels_eq_1_unipass_fulltile_with_qmax) {
1833   ArgMaxPoolMicrokernelTester()
1834     .pooling_elements(9)
1835     .pooling_tile(9)
1836     .channels(1)
1837     .qmax(192)
1838     .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1839 }
1840 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,channels_eq_1_unipass_subtile)1841 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, channels_eq_1_unipass_subtile) {
1842   for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1843     ArgMaxPoolMicrokernelTester()
1844       .pooling_elements(pooling_elements)
1845       .pooling_tile(9)
1846       .channels(1)
1847       .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1848   }
1849 }
1850 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,channels_eq_1_unipass_subtile_with_input_offset)1851 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, channels_eq_1_unipass_subtile_with_input_offset) {
1852   for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1853     ArgMaxPoolMicrokernelTester()
1854       .pooling_elements(pooling_elements)
1855       .pooling_tile(9)
1856       .channels(1)
1857       .input_offset(3)
1858       .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1859   }
1860 }
1861 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,channels_gt_1_unipass_fulltile)1862 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, channels_gt_1_unipass_fulltile) {
1863   for (size_t channels = 2; channels < 10; channels++) {
1864     ArgMaxPoolMicrokernelTester()
1865       .pooling_elements(9)
1866       .pooling_tile(9)
1867       .channels(channels)
1868       .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1869   }
1870 }
1871 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,channels_gt_1_unipass_fulltile_with_input_offset)1872 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, channels_gt_1_unipass_fulltile_with_input_offset) {
1873   for (size_t channels = 2; channels < 10; channels++) {
1874     ArgMaxPoolMicrokernelTester()
1875       .pooling_elements(9)
1876       .pooling_tile(9)
1877       .channels(channels)
1878       .input_offset(3)
1879       .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1880   }
1881 }
1882 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,channels_gt_1_unipass_fulltile_with_qmin)1883 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, channels_gt_1_unipass_fulltile_with_qmin) {
1884   for (size_t channels = 2; channels < 10; channels++) {
1885     ArgMaxPoolMicrokernelTester()
1886       .pooling_elements(9)
1887       .pooling_tile(9)
1888       .channels(channels)
1889       .qmin(192)
1890       .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1891   }
1892 }
1893 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,channels_gt_1_unipass_fulltile_with_qmax)1894 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, channels_gt_1_unipass_fulltile_with_qmax) {
1895   for (size_t channels = 2; channels < 10; channels++) {
1896     ArgMaxPoolMicrokernelTester()
1897       .pooling_elements(9)
1898       .pooling_tile(9)
1899       .channels(channels)
1900       .qmax(192)
1901       .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1902   }
1903 }
1904 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,channels_gt_1_unipass_subtile)1905 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, channels_gt_1_unipass_subtile) {
1906   for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1907     for (size_t channels = 2; channels < 10; channels++) {
1908       ArgMaxPoolMicrokernelTester()
1909         .pooling_elements(pooling_elements)
1910         .pooling_tile(9)
1911         .channels(channels)
1912         .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1913     }
1914   }
1915 }
1916 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,channels_gt_1_unipass_subtile_with_input_offset)1917 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, channels_gt_1_unipass_subtile_with_input_offset) {
1918   for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1919     for (size_t channels = 2; channels < 10; channels++) {
1920       ArgMaxPoolMicrokernelTester()
1921         .pooling_elements(pooling_elements)
1922         .pooling_tile(9)
1923         .channels(channels)
1924         .input_offset(3)
1925         .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1926     }
1927   }
1928 }
1929 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,few_output_pixels)1930 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, few_output_pixels) {
1931   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1932     for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1933       for (size_t channels = 1; channels <= 5; channels += 1) {
1934         ArgMaxPoolMicrokernelTester()
1935           .output_pixels(output_pixels)
1936           .pooling_elements(pooling_elements)
1937           .pooling_tile(9)
1938           .channels(channels)
1939           .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1940       }
1941     }
1942   }
1943 }
1944 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,few_output_pixels_with_input_offset)1945 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, few_output_pixels_with_input_offset) {
1946   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1947     for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1948       for (size_t channels = 1; channels <= 5; channels += 1) {
1949         ArgMaxPoolMicrokernelTester()
1950           .output_pixels(output_pixels)
1951           .pooling_elements(pooling_elements)
1952           .pooling_tile(9)
1953           .channels(channels)
1954           .input_offset(7)
1955           .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1956       }
1957     }
1958   }
1959 }
1960 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,few_output_pixels_with_qmin)1961 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, few_output_pixels_with_qmin) {
1962   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1963     for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1964       for (size_t channels = 1; channels <= 5; channels += 1) {
1965         ArgMaxPoolMicrokernelTester()
1966           .output_pixels(output_pixels)
1967           .pooling_elements(pooling_elements)
1968           .pooling_tile(9)
1969           .channels(channels)
1970           .qmin(192)
1971           .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1972       }
1973     }
1974   }
1975 }
1976 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,few_output_pixels_with_qmax)1977 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, few_output_pixels_with_qmax) {
1978   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1979     for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1980       for (size_t channels = 1; channels <= 5; channels += 1) {
1981         ArgMaxPoolMicrokernelTester()
1982           .output_pixels(output_pixels)
1983           .pooling_elements(pooling_elements)
1984           .pooling_tile(9)
1985           .channels(channels)
1986           .qmax(192)
1987           .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
1988       }
1989     }
1990   }
1991 }
1992 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,few_output_pixels_with_output_stride)1993 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, few_output_pixels_with_output_stride) {
1994   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1995     for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
1996       for (size_t channels = 1; channels <= 5; channels += 1) {
1997         ArgMaxPoolMicrokernelTester()
1998           .output_pixels(output_pixels)
1999           .pooling_elements(pooling_elements)
2000           .pooling_tile(9)
2001           .channels(channels)
2002           .output_stride(7)
2003           .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2004       }
2005     }
2006   }
2007 }
2008 
TEST(F32_ARGMAXPOOL_9X__SCALAR_C1,few_output_pixels_with_step)2009 TEST(F32_ARGMAXPOOL_9X__SCALAR_C1, few_output_pixels_with_step) {
2010   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2011     for (size_t pooling_elements = 2; pooling_elements <= 9; pooling_elements++) {
2012       for (size_t channels = 1; channels <= 5; channels += 1) {
2013         for (size_t step = 2; step <= pooling_elements; step++) {
2014           ArgMaxPoolMicrokernelTester()
2015             .output_pixels(output_pixels)
2016             .pooling_elements(pooling_elements)
2017             .pooling_tile(9)
2018             .step(step)
2019             .channels(channels)
2020             .output_stride(7)
2021             .Test(xnn_f32_argmaxpool_ukernel_9x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2022         }
2023       }
2024     }
2025   }
2026 }
2027 
2028 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
2029 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_eq_4_twopass_fulltile)2030   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_eq_4_twopass_fulltile) {
2031     TEST_REQUIRES_X86_SSE2;
2032     ArgMaxPoolMicrokernelTester()
2033       .pooling_elements(17)
2034       .pooling_tile(9, 8)
2035       .channels(4)
2036       .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2037   }
2038 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_eq_4_twopass_fulltile_with_input_offset)2039   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_eq_4_twopass_fulltile_with_input_offset) {
2040     TEST_REQUIRES_X86_SSE2;
2041     ArgMaxPoolMicrokernelTester()
2042       .pooling_elements(17)
2043       .pooling_tile(9, 8)
2044       .channels(4)
2045       .input_offset(7)
2046       .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2047   }
2048 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_eq_4_twopass_fulltile_with_qmin)2049   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_eq_4_twopass_fulltile_with_qmin) {
2050     TEST_REQUIRES_X86_SSE2;
2051     ArgMaxPoolMicrokernelTester()
2052       .pooling_elements(17)
2053       .pooling_tile(9, 8)
2054       .channels(4)
2055       .qmin(192)
2056       .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2057   }
2058 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_eq_4_twopass_fulltile_with_qmax)2059   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_eq_4_twopass_fulltile_with_qmax) {
2060     TEST_REQUIRES_X86_SSE2;
2061     ArgMaxPoolMicrokernelTester()
2062       .pooling_elements(17)
2063       .pooling_tile(9, 8)
2064       .channels(4)
2065       .qmax(192)
2066       .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2067   }
2068 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_eq_4_twopass_subtile)2069   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_eq_4_twopass_subtile) {
2070     TEST_REQUIRES_X86_SSE2;
2071     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2072       ArgMaxPoolMicrokernelTester()
2073         .pooling_elements(pooling_elements)
2074         .pooling_tile(9, 8)
2075         .channels(4)
2076         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2077     }
2078   }
2079 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_eq_4_twopass_subtile_with_input_offset)2080   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_eq_4_twopass_subtile_with_input_offset) {
2081     TEST_REQUIRES_X86_SSE2;
2082     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2083       ArgMaxPoolMicrokernelTester()
2084         .pooling_elements(pooling_elements)
2085         .pooling_tile(9, 8)
2086         .channels(4)
2087         .input_offset(7)
2088         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2089     }
2090   }
2091 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_div_4_twopass_fulltile)2092   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_div_4_twopass_fulltile) {
2093     TEST_REQUIRES_X86_SSE2;
2094     for (size_t channels = 8; channels < 32; channels += 4) {
2095       ArgMaxPoolMicrokernelTester()
2096         .pooling_elements(17)
2097         .pooling_tile(9, 8)
2098         .channels(channels)
2099         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2100     }
2101   }
2102 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_div_4_twopass_fulltile_with_input_offset)2103   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_div_4_twopass_fulltile_with_input_offset) {
2104     TEST_REQUIRES_X86_SSE2;
2105     for (size_t channels = 8; channels < 32; channels += 4) {
2106       ArgMaxPoolMicrokernelTester()
2107         .pooling_elements(17)
2108         .pooling_tile(9, 8)
2109         .channels(channels)
2110         .input_offset(23)
2111         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2112     }
2113   }
2114 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_div_4_twopass_fulltile_with_qmin)2115   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_div_4_twopass_fulltile_with_qmin) {
2116     TEST_REQUIRES_X86_SSE2;
2117     for (size_t channels = 8; channels < 32; channels += 4) {
2118       ArgMaxPoolMicrokernelTester()
2119         .pooling_elements(17)
2120         .pooling_tile(9, 8)
2121         .channels(channels)
2122         .qmin(192)
2123         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2124     }
2125   }
2126 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_div_4_twopass_fulltile_with_qmax)2127   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_div_4_twopass_fulltile_with_qmax) {
2128     TEST_REQUIRES_X86_SSE2;
2129     for (size_t channels = 8; channels < 32; channels += 4) {
2130       ArgMaxPoolMicrokernelTester()
2131         .pooling_elements(17)
2132         .pooling_tile(9, 8)
2133         .channels(channels)
2134         .qmax(192)
2135         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2136     }
2137   }
2138 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_div_4_twopass_subtile)2139   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_div_4_twopass_subtile) {
2140     TEST_REQUIRES_X86_SSE2;
2141     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2142       for (size_t channels = 8; channels < 32; channels += 4) {
2143         ArgMaxPoolMicrokernelTester()
2144           .pooling_elements(17)
2145           .pooling_tile(9, 8)
2146           .channels(channels)
2147           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2148       }
2149     }
2150   }
2151 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_div_4_twopass_subtile_with_input_offset)2152   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_div_4_twopass_subtile_with_input_offset) {
2153     TEST_REQUIRES_X86_SSE2;
2154     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2155       for (size_t channels = 8; channels < 32; channels += 4) {
2156         ArgMaxPoolMicrokernelTester()
2157           .pooling_elements(17)
2158           .pooling_tile(9, 8)
2159           .channels(channels)
2160           .input_offset(37)
2161           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2162       }
2163     }
2164   }
2165 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_lt_4_twopass_fulltile)2166   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_lt_4_twopass_fulltile) {
2167     TEST_REQUIRES_X86_SSE2;
2168     for (size_t channels = 1; channels < 4; channels++) {
2169       ArgMaxPoolMicrokernelTester()
2170         .pooling_elements(17)
2171         .pooling_tile(9, 8)
2172         .channels(channels)
2173         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2174     }
2175   }
2176 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_lt_4_twopass_fulltile_with_input_offset)2177   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_lt_4_twopass_fulltile_with_input_offset) {
2178     TEST_REQUIRES_X86_SSE2;
2179     for (size_t channels = 1; channels < 4; channels++) {
2180       ArgMaxPoolMicrokernelTester()
2181         .pooling_elements(17)
2182         .pooling_tile(9, 8)
2183         .channels(channels)
2184         .input_offset(5)
2185         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2186     }
2187   }
2188 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_lt_4_twopass_fulltile_with_qmin)2189   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_lt_4_twopass_fulltile_with_qmin) {
2190     TEST_REQUIRES_X86_SSE2;
2191     for (size_t channels = 1; channels < 4; channels++) {
2192       ArgMaxPoolMicrokernelTester()
2193         .pooling_elements(17)
2194         .pooling_tile(9, 8)
2195         .channels(channels)
2196         .qmin(192)
2197         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2198     }
2199   }
2200 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_lt_4_twopass_fulltile_with_qmax)2201   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_lt_4_twopass_fulltile_with_qmax) {
2202     TEST_REQUIRES_X86_SSE2;
2203     for (size_t channels = 1; channels < 4; channels++) {
2204       ArgMaxPoolMicrokernelTester()
2205         .pooling_elements(17)
2206         .pooling_tile(9, 8)
2207         .channels(channels)
2208         .qmax(192)
2209         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2210     }
2211   }
2212 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_lt_4_twopass_subtile)2213   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_lt_4_twopass_subtile) {
2214     TEST_REQUIRES_X86_SSE2;
2215     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2216       for (size_t channels = 1; channels < 4; channels++) {
2217         ArgMaxPoolMicrokernelTester()
2218           .pooling_elements(17)
2219           .pooling_tile(9, 8)
2220           .channels(channels)
2221           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2222       }
2223     }
2224   }
2225 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_lt_4_twopass_subtile_with_input_offset)2226   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_lt_4_twopass_subtile_with_input_offset) {
2227     TEST_REQUIRES_X86_SSE2;
2228     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2229       for (size_t channels = 1; channels < 4; channels++) {
2230         ArgMaxPoolMicrokernelTester()
2231           .pooling_elements(17)
2232           .pooling_tile(9, 8)
2233           .channels(channels)
2234           .input_offset(5)
2235           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2236       }
2237     }
2238   }
2239 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_gt_4_twopass_fulltile)2240   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_gt_4_twopass_fulltile) {
2241     TEST_REQUIRES_X86_SSE2;
2242     for (size_t channels = 5; channels < 8; channels++) {
2243       ArgMaxPoolMicrokernelTester()
2244         .pooling_elements(17)
2245         .pooling_tile(9, 8)
2246         .channels(channels)
2247         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2248     }
2249   }
2250 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_gt_4_twopass_fulltile_with_input_offset)2251   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_gt_4_twopass_fulltile_with_input_offset) {
2252     TEST_REQUIRES_X86_SSE2;
2253     for (size_t channels = 5; channels < 8; channels++) {
2254       ArgMaxPoolMicrokernelTester()
2255         .pooling_elements(17)
2256         .pooling_tile(9, 8)
2257         .channels(channels)
2258         .input_offset(11)
2259         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2260     }
2261   }
2262 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_gt_4_twopass_fulltile_with_qmin)2263   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_gt_4_twopass_fulltile_with_qmin) {
2264     TEST_REQUIRES_X86_SSE2;
2265     for (size_t channels = 5; channels < 8; channels++) {
2266       ArgMaxPoolMicrokernelTester()
2267         .pooling_elements(17)
2268         .pooling_tile(9, 8)
2269         .channels(channels)
2270         .qmin(192)
2271         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2272     }
2273   }
2274 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_gt_4_twopass_fulltile_with_qmax)2275   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_gt_4_twopass_fulltile_with_qmax) {
2276     TEST_REQUIRES_X86_SSE2;
2277     for (size_t channels = 5; channels < 8; channels++) {
2278       ArgMaxPoolMicrokernelTester()
2279         .pooling_elements(17)
2280         .pooling_tile(9, 8)
2281         .channels(channels)
2282         .qmax(192)
2283         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2284     }
2285   }
2286 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_gt_4_twopass_subtile)2287   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_gt_4_twopass_subtile) {
2288     TEST_REQUIRES_X86_SSE2;
2289     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2290       for (size_t channels = 5; channels < 8; channels++) {
2291         ArgMaxPoolMicrokernelTester()
2292           .pooling_elements(17)
2293           .pooling_tile(9, 8)
2294           .channels(channels)
2295           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2296       }
2297     }
2298   }
2299 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_gt_4_twopass_subtile_with_input_offset)2300   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_gt_4_twopass_subtile_with_input_offset) {
2301     TEST_REQUIRES_X86_SSE2;
2302     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2303       for (size_t channels = 5; channels < 8; channels++) {
2304         ArgMaxPoolMicrokernelTester()
2305           .pooling_elements(17)
2306           .pooling_tile(9, 8)
2307           .channels(channels)
2308           .input_offset(11)
2309           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2310       }
2311     }
2312   }
2313 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_eq_4_multipass)2314   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_eq_4_multipass) {
2315     TEST_REQUIRES_X86_SSE2;
2316     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2317       ArgMaxPoolMicrokernelTester()
2318         .pooling_elements(17)
2319         .pooling_tile(9, 8)
2320         .channels(4)
2321         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2322     }
2323   }
2324 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_eq_4_multipass_with_input_offset)2325   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_eq_4_multipass_with_input_offset) {
2326     TEST_REQUIRES_X86_SSE2;
2327     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2328       ArgMaxPoolMicrokernelTester()
2329         .pooling_elements(17)
2330         .pooling_tile(9, 8)
2331         .channels(4)
2332         .input_offset(7)
2333         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2334     }
2335   }
2336 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_eq_4_multipass_with_qmin)2337   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_eq_4_multipass_with_qmin) {
2338     TEST_REQUIRES_X86_SSE2;
2339     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2340       ArgMaxPoolMicrokernelTester()
2341         .pooling_elements(17)
2342         .pooling_tile(9, 8)
2343         .channels(4)
2344         .qmin(192)
2345         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2346     }
2347   }
2348 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_eq_4_multipass_with_qmax)2349   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_eq_4_multipass_with_qmax) {
2350     TEST_REQUIRES_X86_SSE2;
2351     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2352       ArgMaxPoolMicrokernelTester()
2353         .pooling_elements(17)
2354         .pooling_tile(9, 8)
2355         .channels(4)
2356         .qmax(192)
2357         .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2358     }
2359   }
2360 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_div_4_multipass)2361   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_div_4_multipass) {
2362     TEST_REQUIRES_X86_SSE2;
2363     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2364       for (size_t channels = 8; channels < 32; channels += 4) {
2365         ArgMaxPoolMicrokernelTester()
2366           .pooling_elements(17)
2367           .pooling_tile(9, 8)
2368           .channels(channels)
2369           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2370       }
2371     }
2372   }
2373 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_div_4_multipass_with_input_offset)2374   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_div_4_multipass_with_input_offset) {
2375     TEST_REQUIRES_X86_SSE2;
2376     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2377       for (size_t channels = 8; channels < 32; channels += 4) {
2378         ArgMaxPoolMicrokernelTester()
2379           .pooling_elements(17)
2380           .pooling_tile(9, 8)
2381           .channels(channels)
2382           .input_offset(37)
2383           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2384       }
2385     }
2386   }
2387 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_div_4_multipass_with_qmin)2388   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_div_4_multipass_with_qmin) {
2389     TEST_REQUIRES_X86_SSE2;
2390     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2391       for (size_t channels = 8; channels < 32; channels += 4) {
2392         ArgMaxPoolMicrokernelTester()
2393           .pooling_elements(17)
2394           .pooling_tile(9, 8)
2395           .channels(channels)
2396           .qmin(192)
2397           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2398       }
2399     }
2400   }
2401 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_div_4_multipass_with_qmax)2402   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_div_4_multipass_with_qmax) {
2403     TEST_REQUIRES_X86_SSE2;
2404     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2405       for (size_t channels = 8; channels < 32; channels += 4) {
2406         ArgMaxPoolMicrokernelTester()
2407           .pooling_elements(17)
2408           .pooling_tile(9, 8)
2409           .channels(channels)
2410           .qmax(192)
2411           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2412       }
2413     }
2414   }
2415 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_lt_4_multipass)2416   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_lt_4_multipass) {
2417     TEST_REQUIRES_X86_SSE2;
2418     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2419       for (size_t channels = 1; channels < 4; channels++) {
2420         ArgMaxPoolMicrokernelTester()
2421           .pooling_elements(17)
2422           .pooling_tile(9, 8)
2423           .channels(channels)
2424           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2425       }
2426     }
2427   }
2428 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_lt_4_multipass_with_input_offset)2429   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_lt_4_multipass_with_input_offset) {
2430     TEST_REQUIRES_X86_SSE2;
2431     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2432       for (size_t channels = 1; channels < 4; channels++) {
2433         ArgMaxPoolMicrokernelTester()
2434           .pooling_elements(17)
2435           .pooling_tile(9, 8)
2436           .channels(channels)
2437           .input_offset(4)
2438           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2439       }
2440     }
2441   }
2442 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_lt_4_multipass_with_qmin)2443   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_lt_4_multipass_with_qmin) {
2444     TEST_REQUIRES_X86_SSE2;
2445     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2446       for (size_t channels = 1; channels < 4; channels++) {
2447         ArgMaxPoolMicrokernelTester()
2448           .pooling_elements(17)
2449           .pooling_tile(9, 8)
2450           .channels(channels)
2451           .qmin(192)
2452           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2453       }
2454     }
2455   }
2456 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_lt_4_multipass_with_qmax)2457   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_lt_4_multipass_with_qmax) {
2458     TEST_REQUIRES_X86_SSE2;
2459     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2460       for (size_t channels = 1; channels < 4; channels++) {
2461         ArgMaxPoolMicrokernelTester()
2462           .pooling_elements(17)
2463           .pooling_tile(9, 8)
2464           .channels(channels)
2465           .qmax(192)
2466           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2467       }
2468     }
2469   }
2470 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_gt_4_multipass)2471   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_gt_4_multipass) {
2472     TEST_REQUIRES_X86_SSE2;
2473     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2474       for (size_t channels = 5; channels < 8; channels++) {
2475         ArgMaxPoolMicrokernelTester()
2476           .pooling_elements(17)
2477           .pooling_tile(9, 8)
2478           .channels(channels)
2479           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2480       }
2481     }
2482   }
2483 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_gt_4_multipass_with_input_offset)2484   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_gt_4_multipass_with_input_offset) {
2485     TEST_REQUIRES_X86_SSE2;
2486     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2487       for (size_t channels = 5; channels < 8; channels++) {
2488         ArgMaxPoolMicrokernelTester()
2489           .pooling_elements(17)
2490           .pooling_tile(9, 8)
2491           .channels(channels)
2492           .input_offset(11)
2493           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2494       }
2495     }
2496   }
2497 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_gt_4_multipass_with_qmin)2498   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_gt_4_multipass_with_qmin) {
2499     TEST_REQUIRES_X86_SSE2;
2500     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2501       for (size_t channels = 5; channels < 8; channels++) {
2502         ArgMaxPoolMicrokernelTester()
2503           .pooling_elements(17)
2504           .pooling_tile(9, 8)
2505           .channels(channels)
2506           .qmin(192)
2507           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2508       }
2509     }
2510   }
2511 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,channels_gt_4_multipass_with_qmax)2512   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, channels_gt_4_multipass_with_qmax) {
2513     TEST_REQUIRES_X86_SSE2;
2514     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2515       for (size_t channels = 5; channels < 8; channels++) {
2516         ArgMaxPoolMicrokernelTester()
2517           .pooling_elements(17)
2518           .pooling_tile(9, 8)
2519           .channels(channels)
2520           .qmax(192)
2521           .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2522       }
2523     }
2524   }
2525 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,few_output_pixels)2526   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, few_output_pixels) {
2527     TEST_REQUIRES_X86_SSE2;
2528     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2529       for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
2530         for (size_t channels = 1; channels <= 20; channels += 3) {
2531           ArgMaxPoolMicrokernelTester()
2532             .output_pixels(output_pixels)
2533             .pooling_elements(pooling_elements)
2534             .pooling_tile(9, 8)
2535             .channels(channels)
2536             .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2537         }
2538       }
2539     }
2540   }
2541 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,few_output_pixels_with_input_offset)2542   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, few_output_pixels_with_input_offset) {
2543     TEST_REQUIRES_X86_SSE2;
2544     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2545       for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
2546         for (size_t channels = 1; channels <= 20; channels += 3) {
2547           ArgMaxPoolMicrokernelTester()
2548             .output_pixels(output_pixels)
2549             .pooling_elements(pooling_elements)
2550             .pooling_tile(9, 8)
2551             .channels(channels)
2552             .input_offset(23)
2553             .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2554         }
2555       }
2556     }
2557   }
2558 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,few_output_pixels_with_qmin)2559   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, few_output_pixels_with_qmin) {
2560     TEST_REQUIRES_X86_SSE2;
2561     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2562       for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
2563         for (size_t channels = 1; channels <= 20; channels += 3) {
2564           ArgMaxPoolMicrokernelTester()
2565             .output_pixels(output_pixels)
2566             .pooling_elements(pooling_elements)
2567             .pooling_tile(9, 8)
2568             .channels(channels)
2569             .qmin(192)
2570             .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2571         }
2572       }
2573     }
2574   }
2575 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,few_output_pixels_with_qmax)2576   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, few_output_pixels_with_qmax) {
2577     TEST_REQUIRES_X86_SSE2;
2578     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2579       for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
2580         for (size_t channels = 1; channels <= 20; channels += 3) {
2581           ArgMaxPoolMicrokernelTester()
2582             .output_pixels(output_pixels)
2583             .pooling_elements(pooling_elements)
2584             .pooling_tile(9, 8)
2585             .channels(channels)
2586             .qmax(192)
2587             .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2588         }
2589       }
2590     }
2591   }
2592 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,few_output_pixels_with_output_stride)2593   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, few_output_pixels_with_output_stride) {
2594     TEST_REQUIRES_X86_SSE2;
2595     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2596       for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
2597         for (size_t channels = 1; channels <= 20; channels += 3) {
2598           ArgMaxPoolMicrokernelTester()
2599             .output_pixels(output_pixels)
2600             .pooling_elements(pooling_elements)
2601             .pooling_tile(9, 8)
2602             .channels(channels)
2603             .output_stride(23)
2604             .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2605         }
2606       }
2607     }
2608   }
2609 
TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4,few_output_pixels_with_step)2610   TEST(F32_ARGMAXPOOL_9P8X__SSE2_C4, few_output_pixels_with_step) {
2611     TEST_REQUIRES_X86_SSE2;
2612     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2613       for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
2614         for (size_t channels = 1; channels <= 20; channels += 3) {
2615           for (size_t step = 2; step <= pooling_elements; step++) {
2616             ArgMaxPoolMicrokernelTester()
2617               .output_pixels(output_pixels)
2618               .pooling_elements(pooling_elements)
2619               .pooling_tile(9, 8)
2620               .step(step)
2621               .channels(channels)
2622               .output_stride(23)
2623               .Test(xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4);
2624           }
2625         }
2626       }
2627     }
2628   }
2629 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2630 
2631 
2632 #if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
2633 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_eq_4_twopass_fulltile)2634   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_eq_4_twopass_fulltile) {
2635     TEST_REQUIRES_PSIMD;
2636     ArgMaxPoolMicrokernelTester()
2637       .pooling_elements(17)
2638       .pooling_tile(9, 8)
2639       .channels(4)
2640       .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2641   }
2642 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_eq_4_twopass_fulltile_with_input_offset)2643   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_eq_4_twopass_fulltile_with_input_offset) {
2644     TEST_REQUIRES_PSIMD;
2645     ArgMaxPoolMicrokernelTester()
2646       .pooling_elements(17)
2647       .pooling_tile(9, 8)
2648       .channels(4)
2649       .input_offset(7)
2650       .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2651   }
2652 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_eq_4_twopass_fulltile_with_qmin)2653   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_eq_4_twopass_fulltile_with_qmin) {
2654     TEST_REQUIRES_PSIMD;
2655     ArgMaxPoolMicrokernelTester()
2656       .pooling_elements(17)
2657       .pooling_tile(9, 8)
2658       .channels(4)
2659       .qmin(192)
2660       .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2661   }
2662 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_eq_4_twopass_fulltile_with_qmax)2663   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_eq_4_twopass_fulltile_with_qmax) {
2664     TEST_REQUIRES_PSIMD;
2665     ArgMaxPoolMicrokernelTester()
2666       .pooling_elements(17)
2667       .pooling_tile(9, 8)
2668       .channels(4)
2669       .qmax(192)
2670       .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2671   }
2672 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_eq_4_twopass_subtile)2673   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_eq_4_twopass_subtile) {
2674     TEST_REQUIRES_PSIMD;
2675     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2676       ArgMaxPoolMicrokernelTester()
2677         .pooling_elements(pooling_elements)
2678         .pooling_tile(9, 8)
2679         .channels(4)
2680         .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2681     }
2682   }
2683 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_eq_4_twopass_subtile_with_input_offset)2684   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_eq_4_twopass_subtile_with_input_offset) {
2685     TEST_REQUIRES_PSIMD;
2686     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2687       ArgMaxPoolMicrokernelTester()
2688         .pooling_elements(pooling_elements)
2689         .pooling_tile(9, 8)
2690         .channels(4)
2691         .input_offset(7)
2692         .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2693     }
2694   }
2695 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_div_4_twopass_fulltile)2696   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_div_4_twopass_fulltile) {
2697     TEST_REQUIRES_PSIMD;
2698     for (size_t channels = 8; channels < 32; channels += 4) {
2699       ArgMaxPoolMicrokernelTester()
2700         .pooling_elements(17)
2701         .pooling_tile(9, 8)
2702         .channels(channels)
2703         .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2704     }
2705   }
2706 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_div_4_twopass_fulltile_with_input_offset)2707   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_div_4_twopass_fulltile_with_input_offset) {
2708     TEST_REQUIRES_PSIMD;
2709     for (size_t channels = 8; channels < 32; channels += 4) {
2710       ArgMaxPoolMicrokernelTester()
2711         .pooling_elements(17)
2712         .pooling_tile(9, 8)
2713         .channels(channels)
2714         .input_offset(23)
2715         .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2716     }
2717   }
2718 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_div_4_twopass_fulltile_with_qmin)2719   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_div_4_twopass_fulltile_with_qmin) {
2720     TEST_REQUIRES_PSIMD;
2721     for (size_t channels = 8; channels < 32; channels += 4) {
2722       ArgMaxPoolMicrokernelTester()
2723         .pooling_elements(17)
2724         .pooling_tile(9, 8)
2725         .channels(channels)
2726         .qmin(192)
2727         .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2728     }
2729   }
2730 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_div_4_twopass_fulltile_with_qmax)2731   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_div_4_twopass_fulltile_with_qmax) {
2732     TEST_REQUIRES_PSIMD;
2733     for (size_t channels = 8; channels < 32; channels += 4) {
2734       ArgMaxPoolMicrokernelTester()
2735         .pooling_elements(17)
2736         .pooling_tile(9, 8)
2737         .channels(channels)
2738         .qmax(192)
2739         .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2740     }
2741   }
2742 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_div_4_twopass_subtile)2743   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_div_4_twopass_subtile) {
2744     TEST_REQUIRES_PSIMD;
2745     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2746       for (size_t channels = 8; channels < 32; channels += 4) {
2747         ArgMaxPoolMicrokernelTester()
2748           .pooling_elements(17)
2749           .pooling_tile(9, 8)
2750           .channels(channels)
2751           .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2752       }
2753     }
2754   }
2755 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_div_4_twopass_subtile_with_input_offset)2756   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_div_4_twopass_subtile_with_input_offset) {
2757     TEST_REQUIRES_PSIMD;
2758     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2759       for (size_t channels = 8; channels < 32; channels += 4) {
2760         ArgMaxPoolMicrokernelTester()
2761           .pooling_elements(17)
2762           .pooling_tile(9, 8)
2763           .channels(channels)
2764           .input_offset(37)
2765           .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2766       }
2767     }
2768   }
2769 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_lt_4_twopass_fulltile)2770   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_lt_4_twopass_fulltile) {
2771     TEST_REQUIRES_PSIMD;
2772     for (size_t channels = 1; channels < 4; channels++) {
2773       ArgMaxPoolMicrokernelTester()
2774         .pooling_elements(17)
2775         .pooling_tile(9, 8)
2776         .channels(channels)
2777         .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2778     }
2779   }
2780 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_lt_4_twopass_fulltile_with_input_offset)2781   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_lt_4_twopass_fulltile_with_input_offset) {
2782     TEST_REQUIRES_PSIMD;
2783     for (size_t channels = 1; channels < 4; channels++) {
2784       ArgMaxPoolMicrokernelTester()
2785         .pooling_elements(17)
2786         .pooling_tile(9, 8)
2787         .channels(channels)
2788         .input_offset(5)
2789         .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2790     }
2791   }
2792 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_lt_4_twopass_fulltile_with_qmin)2793   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_lt_4_twopass_fulltile_with_qmin) {
2794     TEST_REQUIRES_PSIMD;
2795     for (size_t channels = 1; channels < 4; channels++) {
2796       ArgMaxPoolMicrokernelTester()
2797         .pooling_elements(17)
2798         .pooling_tile(9, 8)
2799         .channels(channels)
2800         .qmin(192)
2801         .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2802     }
2803   }
2804 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_lt_4_twopass_fulltile_with_qmax)2805   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_lt_4_twopass_fulltile_with_qmax) {
2806     TEST_REQUIRES_PSIMD;
2807     for (size_t channels = 1; channels < 4; channels++) {
2808       ArgMaxPoolMicrokernelTester()
2809         .pooling_elements(17)
2810         .pooling_tile(9, 8)
2811         .channels(channels)
2812         .qmax(192)
2813         .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2814     }
2815   }
2816 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_lt_4_twopass_subtile)2817   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_lt_4_twopass_subtile) {
2818     TEST_REQUIRES_PSIMD;
2819     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2820       for (size_t channels = 1; channels < 4; channels++) {
2821         ArgMaxPoolMicrokernelTester()
2822           .pooling_elements(17)
2823           .pooling_tile(9, 8)
2824           .channels(channels)
2825           .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2826       }
2827     }
2828   }
2829 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_lt_4_twopass_subtile_with_input_offset)2830   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_lt_4_twopass_subtile_with_input_offset) {
2831     TEST_REQUIRES_PSIMD;
2832     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2833       for (size_t channels = 1; channels < 4; channels++) {
2834         ArgMaxPoolMicrokernelTester()
2835           .pooling_elements(17)
2836           .pooling_tile(9, 8)
2837           .channels(channels)
2838           .input_offset(5)
2839           .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2840       }
2841     }
2842   }
2843 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_gt_4_twopass_fulltile)2844   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_gt_4_twopass_fulltile) {
2845     TEST_REQUIRES_PSIMD;
2846     for (size_t channels = 5; channels < 8; channels++) {
2847       ArgMaxPoolMicrokernelTester()
2848         .pooling_elements(17)
2849         .pooling_tile(9, 8)
2850         .channels(channels)
2851         .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2852     }
2853   }
2854 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_gt_4_twopass_fulltile_with_input_offset)2855   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_gt_4_twopass_fulltile_with_input_offset) {
2856     TEST_REQUIRES_PSIMD;
2857     for (size_t channels = 5; channels < 8; channels++) {
2858       ArgMaxPoolMicrokernelTester()
2859         .pooling_elements(17)
2860         .pooling_tile(9, 8)
2861         .channels(channels)
2862         .input_offset(11)
2863         .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2864     }
2865   }
2866 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_gt_4_twopass_fulltile_with_qmin)2867   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_gt_4_twopass_fulltile_with_qmin) {
2868     TEST_REQUIRES_PSIMD;
2869     for (size_t channels = 5; channels < 8; channels++) {
2870       ArgMaxPoolMicrokernelTester()
2871         .pooling_elements(17)
2872         .pooling_tile(9, 8)
2873         .channels(channels)
2874         .qmin(192)
2875         .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2876     }
2877   }
2878 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_gt_4_twopass_fulltile_with_qmax)2879   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_gt_4_twopass_fulltile_with_qmax) {
2880     TEST_REQUIRES_PSIMD;
2881     for (size_t channels = 5; channels < 8; channels++) {
2882       ArgMaxPoolMicrokernelTester()
2883         .pooling_elements(17)
2884         .pooling_tile(9, 8)
2885         .channels(channels)
2886         .qmax(192)
2887         .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2888     }
2889   }
2890 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_gt_4_twopass_subtile)2891   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_gt_4_twopass_subtile) {
2892     TEST_REQUIRES_PSIMD;
2893     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2894       for (size_t channels = 5; channels < 8; channels++) {
2895         ArgMaxPoolMicrokernelTester()
2896           .pooling_elements(17)
2897           .pooling_tile(9, 8)
2898           .channels(channels)
2899           .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2900       }
2901     }
2902   }
2903 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_gt_4_twopass_subtile_with_input_offset)2904   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_gt_4_twopass_subtile_with_input_offset) {
2905     TEST_REQUIRES_PSIMD;
2906     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
2907       for (size_t channels = 5; channels < 8; channels++) {
2908         ArgMaxPoolMicrokernelTester()
2909           .pooling_elements(17)
2910           .pooling_tile(9, 8)
2911           .channels(channels)
2912           .input_offset(11)
2913           .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2914       }
2915     }
2916   }
2917 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_eq_4_multipass)2918   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_eq_4_multipass) {
2919     TEST_REQUIRES_PSIMD;
2920     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2921       ArgMaxPoolMicrokernelTester()
2922         .pooling_elements(17)
2923         .pooling_tile(9, 8)
2924         .channels(4)
2925         .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2926     }
2927   }
2928 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_eq_4_multipass_with_input_offset)2929   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_eq_4_multipass_with_input_offset) {
2930     TEST_REQUIRES_PSIMD;
2931     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2932       ArgMaxPoolMicrokernelTester()
2933         .pooling_elements(17)
2934         .pooling_tile(9, 8)
2935         .channels(4)
2936         .input_offset(7)
2937         .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2938     }
2939   }
2940 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_eq_4_multipass_with_qmin)2941   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_eq_4_multipass_with_qmin) {
2942     TEST_REQUIRES_PSIMD;
2943     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2944       ArgMaxPoolMicrokernelTester()
2945         .pooling_elements(17)
2946         .pooling_tile(9, 8)
2947         .channels(4)
2948         .qmin(192)
2949         .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2950     }
2951   }
2952 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_eq_4_multipass_with_qmax)2953   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_eq_4_multipass_with_qmax) {
2954     TEST_REQUIRES_PSIMD;
2955     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2956       ArgMaxPoolMicrokernelTester()
2957         .pooling_elements(17)
2958         .pooling_tile(9, 8)
2959         .channels(4)
2960         .qmax(192)
2961         .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2962     }
2963   }
2964 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_div_4_multipass)2965   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_div_4_multipass) {
2966     TEST_REQUIRES_PSIMD;
2967     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2968       for (size_t channels = 8; channels < 32; channels += 4) {
2969         ArgMaxPoolMicrokernelTester()
2970           .pooling_elements(17)
2971           .pooling_tile(9, 8)
2972           .channels(channels)
2973           .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2974       }
2975     }
2976   }
2977 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_div_4_multipass_with_input_offset)2978   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_div_4_multipass_with_input_offset) {
2979     TEST_REQUIRES_PSIMD;
2980     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2981       for (size_t channels = 8; channels < 32; channels += 4) {
2982         ArgMaxPoolMicrokernelTester()
2983           .pooling_elements(17)
2984           .pooling_tile(9, 8)
2985           .channels(channels)
2986           .input_offset(37)
2987           .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
2988       }
2989     }
2990   }
2991 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_div_4_multipass_with_qmin)2992   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_div_4_multipass_with_qmin) {
2993     TEST_REQUIRES_PSIMD;
2994     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2995       for (size_t channels = 8; channels < 32; channels += 4) {
2996         ArgMaxPoolMicrokernelTester()
2997           .pooling_elements(17)
2998           .pooling_tile(9, 8)
2999           .channels(channels)
3000           .qmin(192)
3001           .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3002       }
3003     }
3004   }
3005 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_div_4_multipass_with_qmax)3006   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_div_4_multipass_with_qmax) {
3007     TEST_REQUIRES_PSIMD;
3008     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
3009       for (size_t channels = 8; channels < 32; channels += 4) {
3010         ArgMaxPoolMicrokernelTester()
3011           .pooling_elements(17)
3012           .pooling_tile(9, 8)
3013           .channels(channels)
3014           .qmax(192)
3015           .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3016       }
3017     }
3018   }
3019 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_lt_4_multipass)3020   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_lt_4_multipass) {
3021     TEST_REQUIRES_PSIMD;
3022     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
3023       for (size_t channels = 1; channels < 4; channels++) {
3024         ArgMaxPoolMicrokernelTester()
3025           .pooling_elements(17)
3026           .pooling_tile(9, 8)
3027           .channels(channels)
3028           .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3029       }
3030     }
3031   }
3032 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_lt_4_multipass_with_input_offset)3033   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_lt_4_multipass_with_input_offset) {
3034     TEST_REQUIRES_PSIMD;
3035     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
3036       for (size_t channels = 1; channels < 4; channels++) {
3037         ArgMaxPoolMicrokernelTester()
3038           .pooling_elements(17)
3039           .pooling_tile(9, 8)
3040           .channels(channels)
3041           .input_offset(4)
3042           .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3043       }
3044     }
3045   }
3046 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_lt_4_multipass_with_qmin)3047   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_lt_4_multipass_with_qmin) {
3048     TEST_REQUIRES_PSIMD;
3049     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
3050       for (size_t channels = 1; channels < 4; channels++) {
3051         ArgMaxPoolMicrokernelTester()
3052           .pooling_elements(17)
3053           .pooling_tile(9, 8)
3054           .channels(channels)
3055           .qmin(192)
3056           .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3057       }
3058     }
3059   }
3060 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_lt_4_multipass_with_qmax)3061   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_lt_4_multipass_with_qmax) {
3062     TEST_REQUIRES_PSIMD;
3063     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
3064       for (size_t channels = 1; channels < 4; channels++) {
3065         ArgMaxPoolMicrokernelTester()
3066           .pooling_elements(17)
3067           .pooling_tile(9, 8)
3068           .channels(channels)
3069           .qmax(192)
3070           .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3071       }
3072     }
3073   }
3074 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_gt_4_multipass)3075   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_gt_4_multipass) {
3076     TEST_REQUIRES_PSIMD;
3077     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
3078       for (size_t channels = 5; channels < 8; channels++) {
3079         ArgMaxPoolMicrokernelTester()
3080           .pooling_elements(17)
3081           .pooling_tile(9, 8)
3082           .channels(channels)
3083           .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3084       }
3085     }
3086   }
3087 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_gt_4_multipass_with_input_offset)3088   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_gt_4_multipass_with_input_offset) {
3089     TEST_REQUIRES_PSIMD;
3090     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
3091       for (size_t channels = 5; channels < 8; channels++) {
3092         ArgMaxPoolMicrokernelTester()
3093           .pooling_elements(17)
3094           .pooling_tile(9, 8)
3095           .channels(channels)
3096           .input_offset(11)
3097           .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3098       }
3099     }
3100   }
3101 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_gt_4_multipass_with_qmin)3102   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_gt_4_multipass_with_qmin) {
3103     TEST_REQUIRES_PSIMD;
3104     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
3105       for (size_t channels = 5; channels < 8; channels++) {
3106         ArgMaxPoolMicrokernelTester()
3107           .pooling_elements(17)
3108           .pooling_tile(9, 8)
3109           .channels(channels)
3110           .qmin(192)
3111           .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3112       }
3113     }
3114   }
3115 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,channels_gt_4_multipass_with_qmax)3116   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, channels_gt_4_multipass_with_qmax) {
3117     TEST_REQUIRES_PSIMD;
3118     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
3119       for (size_t channels = 5; channels < 8; channels++) {
3120         ArgMaxPoolMicrokernelTester()
3121           .pooling_elements(17)
3122           .pooling_tile(9, 8)
3123           .channels(channels)
3124           .qmax(192)
3125           .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3126       }
3127     }
3128   }
3129 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,few_output_pixels)3130   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, few_output_pixels) {
3131     TEST_REQUIRES_PSIMD;
3132     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
3133       for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
3134         for (size_t channels = 1; channels <= 20; channels += 3) {
3135           ArgMaxPoolMicrokernelTester()
3136             .output_pixels(output_pixels)
3137             .pooling_elements(pooling_elements)
3138             .pooling_tile(9, 8)
3139             .channels(channels)
3140             .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3141         }
3142       }
3143     }
3144   }
3145 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,few_output_pixels_with_input_offset)3146   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, few_output_pixels_with_input_offset) {
3147     TEST_REQUIRES_PSIMD;
3148     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
3149       for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
3150         for (size_t channels = 1; channels <= 20; channels += 3) {
3151           ArgMaxPoolMicrokernelTester()
3152             .output_pixels(output_pixels)
3153             .pooling_elements(pooling_elements)
3154             .pooling_tile(9, 8)
3155             .channels(channels)
3156             .input_offset(23)
3157             .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3158         }
3159       }
3160     }
3161   }
3162 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,few_output_pixels_with_qmin)3163   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, few_output_pixels_with_qmin) {
3164     TEST_REQUIRES_PSIMD;
3165     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
3166       for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
3167         for (size_t channels = 1; channels <= 20; channels += 3) {
3168           ArgMaxPoolMicrokernelTester()
3169             .output_pixels(output_pixels)
3170             .pooling_elements(pooling_elements)
3171             .pooling_tile(9, 8)
3172             .channels(channels)
3173             .qmin(192)
3174             .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3175         }
3176       }
3177     }
3178   }
3179 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,few_output_pixels_with_qmax)3180   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, few_output_pixels_with_qmax) {
3181     TEST_REQUIRES_PSIMD;
3182     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
3183       for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
3184         for (size_t channels = 1; channels <= 20; channels += 3) {
3185           ArgMaxPoolMicrokernelTester()
3186             .output_pixels(output_pixels)
3187             .pooling_elements(pooling_elements)
3188             .pooling_tile(9, 8)
3189             .channels(channels)
3190             .qmax(192)
3191             .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3192         }
3193       }
3194     }
3195   }
3196 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,few_output_pixels_with_output_stride)3197   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, few_output_pixels_with_output_stride) {
3198     TEST_REQUIRES_PSIMD;
3199     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
3200       for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
3201         for (size_t channels = 1; channels <= 20; channels += 3) {
3202           ArgMaxPoolMicrokernelTester()
3203             .output_pixels(output_pixels)
3204             .pooling_elements(pooling_elements)
3205             .pooling_tile(9, 8)
3206             .channels(channels)
3207             .output_stride(23)
3208             .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3209         }
3210       }
3211     }
3212   }
3213 
TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4,few_output_pixels_with_step)3214   TEST(F32_ARGMAXPOOL_9P8X__PSIMD_C4, few_output_pixels_with_step) {
3215     TEST_REQUIRES_PSIMD;
3216     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
3217       for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
3218         for (size_t channels = 1; channels <= 20; channels += 3) {
3219           for (size_t step = 2; step <= pooling_elements; step++) {
3220             ArgMaxPoolMicrokernelTester()
3221               .output_pixels(output_pixels)
3222               .pooling_elements(pooling_elements)
3223               .pooling_tile(9, 8)
3224               .step(step)
3225               .channels(channels)
3226               .output_stride(23)
3227               .Test(xnn_f32_argmaxpool_ukernel_9p8x__psimd_c4, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3228           }
3229         }
3230       }
3231     }
3232   }
3233 #endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
3234 
3235 
3236 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_eq_1_twopass_fulltile)3237 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_eq_1_twopass_fulltile) {
3238   ArgMaxPoolMicrokernelTester()
3239     .pooling_elements(17)
3240     .pooling_tile(9, 8)
3241     .channels(1)
3242     .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3243 }
3244 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_eq_1_twopass_fulltile_with_input_offset)3245 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_eq_1_twopass_fulltile_with_input_offset) {
3246   ArgMaxPoolMicrokernelTester()
3247     .pooling_elements(17)
3248     .pooling_tile(9, 8)
3249     .channels(1)
3250     .input_offset(3)
3251     .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3252 }
3253 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_eq_1_twopass_fulltile_with_qmin)3254 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_eq_1_twopass_fulltile_with_qmin) {
3255   ArgMaxPoolMicrokernelTester()
3256     .pooling_elements(17)
3257     .pooling_tile(9, 8)
3258     .channels(1)
3259     .qmin(192)
3260     .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3261 }
3262 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_eq_1_twopass_fulltile_with_qmax)3263 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_eq_1_twopass_fulltile_with_qmax) {
3264   ArgMaxPoolMicrokernelTester()
3265     .pooling_elements(17)
3266     .pooling_tile(9, 8)
3267     .channels(1)
3268     .qmax(192)
3269     .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3270 }
3271 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_eq_1_twopass_subtile)3272 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_eq_1_twopass_subtile) {
3273   for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
3274     ArgMaxPoolMicrokernelTester()
3275       .pooling_elements(pooling_elements)
3276       .pooling_tile(9, 8)
3277       .channels(1)
3278       .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3279   }
3280 }
3281 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_eq_1_twopass_subtile_with_input_offset)3282 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_eq_1_twopass_subtile_with_input_offset) {
3283   for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
3284     ArgMaxPoolMicrokernelTester()
3285       .pooling_elements(pooling_elements)
3286       .pooling_tile(9, 8)
3287       .channels(1)
3288       .input_offset(3)
3289       .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3290   }
3291 }
3292 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_gt_1_twopass_fulltile)3293 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_gt_1_twopass_fulltile) {
3294   for (size_t channels = 2; channels < 10; channels++) {
3295     ArgMaxPoolMicrokernelTester()
3296       .pooling_elements(17)
3297       .pooling_tile(9, 8)
3298       .channels(channels)
3299       .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3300   }
3301 }
3302 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_gt_1_twopass_fulltile_with_input_offset)3303 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_gt_1_twopass_fulltile_with_input_offset) {
3304   for (size_t channels = 2; channels < 10; channels++) {
3305     ArgMaxPoolMicrokernelTester()
3306       .pooling_elements(17)
3307       .pooling_tile(9, 8)
3308       .channels(channels)
3309       .input_offset(3)
3310       .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3311   }
3312 }
3313 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_gt_1_twopass_fulltile_with_qmin)3314 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_gt_1_twopass_fulltile_with_qmin) {
3315   for (size_t channels = 2; channels < 10; channels++) {
3316     ArgMaxPoolMicrokernelTester()
3317       .pooling_elements(17)
3318       .pooling_tile(9, 8)
3319       .channels(channels)
3320       .qmin(192)
3321       .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3322   }
3323 }
3324 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_gt_1_twopass_fulltile_with_qmax)3325 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_gt_1_twopass_fulltile_with_qmax) {
3326   for (size_t channels = 2; channels < 10; channels++) {
3327     ArgMaxPoolMicrokernelTester()
3328       .pooling_elements(17)
3329       .pooling_tile(9, 8)
3330       .channels(channels)
3331       .qmax(192)
3332       .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3333   }
3334 }
3335 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_gt_1_twopass_subtile)3336 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_gt_1_twopass_subtile) {
3337   for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
3338     for (size_t channels = 2; channels < 10; channels++) {
3339       ArgMaxPoolMicrokernelTester()
3340         .pooling_elements(17)
3341         .pooling_tile(9, 8)
3342         .channels(channels)
3343         .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3344     }
3345   }
3346 }
3347 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_gt_1_twopass_subtile_with_input_offset)3348 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_gt_1_twopass_subtile_with_input_offset) {
3349   for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
3350     for (size_t channels = 2; channels < 10; channels++) {
3351       ArgMaxPoolMicrokernelTester()
3352         .pooling_elements(17)
3353         .pooling_tile(9, 8)
3354         .channels(channels)
3355         .input_offset(3)
3356         .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3357     }
3358   }
3359 }
3360 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_eq_1_multipass)3361 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_eq_1_multipass) {
3362   for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
3363     ArgMaxPoolMicrokernelTester()
3364       .pooling_elements(17)
3365       .pooling_tile(9, 8)
3366       .channels(1)
3367       .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3368   }
3369 }
3370 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_eq_1_multipass_with_input_offset)3371 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_eq_1_multipass_with_input_offset) {
3372   for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
3373     ArgMaxPoolMicrokernelTester()
3374       .pooling_elements(17)
3375       .pooling_tile(9, 8)
3376       .channels(1)
3377       .input_offset(3)
3378       .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3379   }
3380 }
3381 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_eq_1_multipass_with_qmin)3382 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_eq_1_multipass_with_qmin) {
3383   for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
3384     ArgMaxPoolMicrokernelTester()
3385       .pooling_elements(17)
3386       .pooling_tile(9, 8)
3387       .channels(1)
3388       .qmin(192)
3389       .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3390   }
3391 }
3392 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_eq_1_multipass_with_qmax)3393 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_eq_1_multipass_with_qmax) {
3394   for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
3395     ArgMaxPoolMicrokernelTester()
3396       .pooling_elements(17)
3397       .pooling_tile(9, 8)
3398       .channels(1)
3399       .qmax(192)
3400       .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3401   }
3402 }
3403 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_gt_1_multipass)3404 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_gt_1_multipass) {
3405   for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
3406     for (size_t channels = 2; channels < 10; channels++) {
3407       ArgMaxPoolMicrokernelTester()
3408         .pooling_elements(17)
3409         .pooling_tile(9, 8)
3410         .channels(channels)
3411         .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3412     }
3413   }
3414 }
3415 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_gt_1_multipass_with_input_offset)3416 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_gt_1_multipass_with_input_offset) {
3417   for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
3418     for (size_t channels = 2; channels < 10; channels++) {
3419       ArgMaxPoolMicrokernelTester()
3420         .pooling_elements(17)
3421         .pooling_tile(9, 8)
3422         .channels(channels)
3423         .input_offset(3)
3424         .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3425     }
3426   }
3427 }
3428 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_gt_1_multipass_with_qmin)3429 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_gt_1_multipass_with_qmin) {
3430   for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
3431     for (size_t channels = 2; channels < 10; channels++) {
3432       ArgMaxPoolMicrokernelTester()
3433         .pooling_elements(17)
3434         .pooling_tile(9, 8)
3435         .channels(channels)
3436         .qmin(192)
3437         .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3438     }
3439   }
3440 }
3441 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,channels_gt_1_multipass_with_qmax)3442 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, channels_gt_1_multipass_with_qmax) {
3443   for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
3444     for (size_t channels = 2; channels < 10; channels++) {
3445       ArgMaxPoolMicrokernelTester()
3446         .pooling_elements(17)
3447         .pooling_tile(9, 8)
3448         .channels(channels)
3449         .qmax(192)
3450         .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3451     }
3452   }
3453 }
3454 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,few_output_pixels)3455 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, few_output_pixels) {
3456   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
3457     for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
3458       for (size_t channels = 1; channels <= 5; channels += 1) {
3459         ArgMaxPoolMicrokernelTester()
3460           .output_pixels(output_pixels)
3461           .pooling_elements(pooling_elements)
3462           .pooling_tile(9, 8)
3463           .channels(channels)
3464           .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3465       }
3466     }
3467   }
3468 }
3469 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,few_output_pixels_with_input_offset)3470 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, few_output_pixels_with_input_offset) {
3471   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
3472     for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
3473       for (size_t channels = 1; channels <= 5; channels += 1) {
3474         ArgMaxPoolMicrokernelTester()
3475           .output_pixels(output_pixels)
3476           .pooling_elements(pooling_elements)
3477           .pooling_tile(9, 8)
3478           .channels(channels)
3479           .input_offset(7)
3480           .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3481       }
3482     }
3483   }
3484 }
3485 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,few_output_pixels_with_qmin)3486 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, few_output_pixels_with_qmin) {
3487   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
3488     for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
3489       for (size_t channels = 1; channels <= 5; channels += 1) {
3490         ArgMaxPoolMicrokernelTester()
3491           .output_pixels(output_pixels)
3492           .pooling_elements(pooling_elements)
3493           .pooling_tile(9, 8)
3494           .channels(channels)
3495           .qmin(192)
3496           .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3497       }
3498     }
3499   }
3500 }
3501 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,few_output_pixels_with_qmax)3502 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, few_output_pixels_with_qmax) {
3503   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
3504     for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
3505       for (size_t channels = 1; channels <= 5; channels += 1) {
3506         ArgMaxPoolMicrokernelTester()
3507           .output_pixels(output_pixels)
3508           .pooling_elements(pooling_elements)
3509           .pooling_tile(9, 8)
3510           .channels(channels)
3511           .qmax(192)
3512           .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3513       }
3514     }
3515   }
3516 }
3517 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,few_output_pixels_with_output_stride)3518 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, few_output_pixels_with_output_stride) {
3519   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
3520     for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
3521       for (size_t channels = 1; channels <= 5; channels += 1) {
3522         ArgMaxPoolMicrokernelTester()
3523           .output_pixels(output_pixels)
3524           .pooling_elements(pooling_elements)
3525           .pooling_tile(9, 8)
3526           .channels(channels)
3527           .output_stride(7)
3528           .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3529       }
3530     }
3531   }
3532 }
3533 
TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1,few_output_pixels_with_step)3534 TEST(F32_ARGMAXPOOL_9P8X__SCALAR_C1, few_output_pixels_with_step) {
3535   for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
3536     for (size_t pooling_elements = 10; pooling_elements <= 17; pooling_elements++) {
3537       for (size_t channels = 1; channels <= 5; channels += 1) {
3538         for (size_t step = 2; step <= pooling_elements; step++) {
3539           ArgMaxPoolMicrokernelTester()
3540             .output_pixels(output_pixels)
3541             .pooling_elements(pooling_elements)
3542             .pooling_tile(9, 8)
3543             .step(step)
3544             .channels(channels)
3545             .output_stride(7)
3546             .Test(xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1, ArgMaxPoolMicrokernelTester::Variant::Scalar);
3547         }
3548       }
3549     }
3550   }
3551 }