• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #include <gtest/gtest.h>
10 
11 #include <xnnpack/common.h>
12 #include <xnnpack/isa-checks.h>
13 
14 #include <xnnpack/avgpool.h>
15 #include "avgpool-microkernel-tester.h"
16 
17 
18 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(Q8_AVGPOOL_UP9__NEON,kc_eq_8_fulltile)19   TEST(Q8_AVGPOOL_UP9__NEON, kc_eq_8_fulltile) {
20     TEST_REQUIRES_ARM_NEON;
21     auto tester = AvgPoolMicrokernelTester()
22       .mr(9)
23       .kc(8);
24     for (size_t kh = 1; kh <= tester.mr(); kh++) {
25       for (size_t kw = 1; kw <= tester.mr(); kw++) {
26         if (kh * kw == tester.mr()) {
27           tester
28             .kh(kh)
29             .kw(kw)
30             .Test(xnn_q8_avgpool_ukernel_up9__neon);
31         }
32       }
33     }
34   }
35 
TEST(Q8_AVGPOOL_UP9__NEON,kc_eq_8_subtile)36   TEST(Q8_AVGPOOL_UP9__NEON, kc_eq_8_subtile) {
37     TEST_REQUIRES_ARM_NEON;
38     auto tester = AvgPoolMicrokernelTester()
39       .mr(9)
40       .kc(8);
41     for (size_t ks = 2; ks < tester.mr(); ks++) {
42       for (size_t kh = 1; kh <= ks; kh++) {
43         for (size_t kw = 1; kw <= ks; kw++) {
44           if (kh * kw == ks) {
45             tester
46               .kh(kh)
47               .kw(kw)
48               .Test(xnn_q8_avgpool_ukernel_up9__neon);
49           }
50         }
51       }
52     }
53   }
54 
TEST(Q8_AVGPOOL_UP9__NEON,kc_div_8_fulltile)55   TEST(Q8_AVGPOOL_UP9__NEON, kc_div_8_fulltile) {
56     TEST_REQUIRES_ARM_NEON;
57     auto tester = AvgPoolMicrokernelTester()
58       .mr(9);
59     for (size_t kh = 1; kh <= tester.mr(); kh++) {
60       for (size_t kw = 1; kw <= tester.mr(); kw++) {
61         if (kh * kw == tester.mr()) {
62           for (size_t kc = 8; kc < 128; kc += 24) {
63             tester
64               .kh(kh)
65               .kw(kw)
66               .kc(kc)
67               .Test(xnn_q8_avgpool_ukernel_up9__neon);
68           }
69         }
70       }
71     }
72   }
73 
TEST(Q8_AVGPOOL_UP9__NEON,kc_div_8_subtile)74   TEST(Q8_AVGPOOL_UP9__NEON, kc_div_8_subtile) {
75     TEST_REQUIRES_ARM_NEON;
76     auto tester = AvgPoolMicrokernelTester()
77       .mr(9)
78       .iterations(3);
79     for (size_t ks = 2; ks < tester.mr(); ks++) {
80       for (size_t kh = 1; kh <= ks; kh++) {
81         for (size_t kw = 1; kw <= ks; kw++) {
82           if (kh * kw == ks) {
83             for (size_t kc = 8; kc < 128; kc += 24) {
84               tester
85                 .kh(kh)
86                 .kw(kw)
87                 .kc(kc)
88                 .Test(xnn_q8_avgpool_ukernel_up9__neon);
89             }
90           }
91         }
92       }
93     }
94   }
95 
TEST(Q8_AVGPOOL_UP9__NEON,kc_div_8_fulltile_with_x_stride)96   TEST(Q8_AVGPOOL_UP9__NEON, kc_div_8_fulltile_with_x_stride) {
97     TEST_REQUIRES_ARM_NEON;
98     auto tester = AvgPoolMicrokernelTester()
99       .mr(9)
100       .iterations(3);
101     for (size_t kh = 1; kh <= tester.mr(); kh++) {
102       for (size_t kw = 1; kw <= tester.mr(); kw++) {
103         if (kh * kw == tester.mr()) {
104           for (size_t kc = 8; kc < 128; kc += 24) {
105             tester
106               .kh(kh)
107               .kw(kw)
108               .kc(kc)
109               .x_stride(131)
110               .Test(xnn_q8_avgpool_ukernel_up9__neon);
111           }
112         }
113       }
114     }
115   }
116 
TEST(Q8_AVGPOOL_UP9__NEON,kc_lt_8_fulltile)117   TEST(Q8_AVGPOOL_UP9__NEON, kc_lt_8_fulltile) {
118     TEST_REQUIRES_ARM_NEON;
119     auto tester = AvgPoolMicrokernelTester()
120       .mr(9);
121     for (size_t kh = 1; kh <= tester.mr(); kh++) {
122       for (size_t kw = 1; kw <= tester.mr(); kw++) {
123         if (kh * kw == tester.mr()) {
124           for (size_t kc = 1; kc < 8; kc++) {
125             tester
126               .kh(kh)
127               .kw(kw)
128               .kc(kc)
129               .Test(xnn_q8_avgpool_ukernel_up9__neon);
130           }
131         }
132       }
133     }
134   }
135 
TEST(Q8_AVGPOOL_UP9__NEON,kc_lt_8_subtile)136   TEST(Q8_AVGPOOL_UP9__NEON, kc_lt_8_subtile) {
137     TEST_REQUIRES_ARM_NEON;
138     auto tester = AvgPoolMicrokernelTester()
139       .mr(9)
140       .iterations(3);
141     for (size_t ks = 2; ks < tester.mr(); ks++) {
142       for (size_t kh = 1; kh <= ks; kh++) {
143         for (size_t kw = 1; kw <= ks; kw++) {
144           if (kh * kw == ks) {
145             for (size_t kc = 1; kc < 8; kc++) {
146               tester
147                 .kh(kh)
148                 .kw(kw)
149                 .kc(kc)
150                 .Test(xnn_q8_avgpool_ukernel_up9__neon);
151             }
152           }
153         }
154       }
155     }
156   }
157 
TEST(Q8_AVGPOOL_UP9__NEON,kc_lt_8_fulltile_with_x_stride)158   TEST(Q8_AVGPOOL_UP9__NEON, kc_lt_8_fulltile_with_x_stride) {
159     TEST_REQUIRES_ARM_NEON;
160     auto tester = AvgPoolMicrokernelTester()
161       .mr(9)
162       .iterations(3);
163     for (size_t kh = 1; kh <= tester.mr(); kh++) {
164       for (size_t kw = 1; kw <= tester.mr(); kw++) {
165         if (kh * kw == tester.mr()) {
166           for (size_t kc = 1; kc < 8; kc++) {
167             tester
168               .kh(kh)
169               .kw(kw)
170               .kc(kc)
171               .x_stride(23)
172               .Test(xnn_q8_avgpool_ukernel_up9__neon);
173           }
174         }
175       }
176     }
177   }
178 
TEST(Q8_AVGPOOL_UP9__NEON,kc_gt_8_fulltile)179   TEST(Q8_AVGPOOL_UP9__NEON, kc_gt_8_fulltile) {
180     TEST_REQUIRES_ARM_NEON;
181     auto tester = AvgPoolMicrokernelTester()
182       .mr(9);
183     for (size_t kh = 1; kh <= tester.mr(); kh++) {
184       for (size_t kw = 1; kw <= tester.mr(); kw++) {
185         if (kh * kw == tester.mr()) {
186           for (size_t kc = 9; kc < 16; kc++) {
187             tester
188               .kh(kh)
189               .kw(kw)
190               .kc(kc)
191               .Test(xnn_q8_avgpool_ukernel_up9__neon);
192           }
193         }
194       }
195     }
196   }
197 
TEST(Q8_AVGPOOL_UP9__NEON,kc_gt_8_subtile)198   TEST(Q8_AVGPOOL_UP9__NEON, kc_gt_8_subtile) {
199     TEST_REQUIRES_ARM_NEON;
200     auto tester = AvgPoolMicrokernelTester()
201       .mr(9)
202       .iterations(3);
203     for (size_t ks = 2; ks < tester.mr(); ks++) {
204       for (size_t kh = 1; kh <= ks; kh++) {
205         for (size_t kw = 1; kw <= ks; kw++) {
206           if (kh * kw == ks) {
207             for (size_t kc = 9; kc < 16; kc++) {
208               tester
209                 .kh(kh)
210                 .kw(kw)
211                 .kc(kc)
212                 .Test(xnn_q8_avgpool_ukernel_up9__neon);
213             }
214           }
215         }
216       }
217     }
218   }
219 
TEST(Q8_AVGPOOL_UP9__NEON,kc_gt_8_fulltile_with_x_stride)220   TEST(Q8_AVGPOOL_UP9__NEON, kc_gt_8_fulltile_with_x_stride) {
221     TEST_REQUIRES_ARM_NEON;
222     auto tester = AvgPoolMicrokernelTester()
223       .mr(9)
224       .iterations(3);
225     for (size_t kh = 1; kh <= tester.mr(); kh++) {
226       for (size_t kw = 1; kw <= tester.mr(); kw++) {
227         if (kh * kw == tester.mr()) {
228           for (size_t kc = 9; kc < 16; kc++) {
229             tester
230               .kh(kh)
231               .kw(kw)
232               .kc(kc)
233               .x_stride(23)
234               .Test(xnn_q8_avgpool_ukernel_up9__neon);
235           }
236         }
237       }
238     }
239   }
240 
TEST(Q8_AVGPOOL_UP9__NEON,kc_div_8_with_x_scale)241   TEST(Q8_AVGPOOL_UP9__NEON, kc_div_8_with_x_scale) {
242     TEST_REQUIRES_ARM_NEON;
243     for (size_t n = 1; n <= 5; n += 2) {
244       for (size_t kc = 8; kc < 128; kc += 24) {
245         for (float x_scale = 0.01f; x_scale < 100.0f; x_scale *= 3.14159265f) {
246           AvgPoolMicrokernelTester()
247             .mr(9)
248             .n(n)
249             .kh(3)
250             .kw(3)
251             .kc(kc)
252             .x_scale(x_scale)
253             .iterations(2)
254             .Test(xnn_q8_avgpool_ukernel_up9__neon);
255         }
256       }
257     }
258   }
259 
TEST(Q8_AVGPOOL_UP9__NEON,kc_div_8_with_x_zero_point)260   TEST(Q8_AVGPOOL_UP9__NEON, kc_div_8_with_x_zero_point) {
261     TEST_REQUIRES_ARM_NEON;
262     for (size_t n = 1; n <= 5; n += 2) {
263       for (size_t kc = 8; kc < 128; kc += 24) {
264         for (int32_t x_zero_point = 0; x_zero_point <= 255; x_zero_point += 51) {
265           AvgPoolMicrokernelTester()
266             .mr(9)
267             .n(n)
268             .kh(3)
269             .kw(3)
270             .kc(kc)
271             .x_zero_point(uint8_t(x_zero_point))
272             .iterations(3)
273             .Test(xnn_q8_avgpool_ukernel_up9__neon);
274         }
275       }
276     }
277   }
278 
TEST(Q8_AVGPOOL_UP9__NEON,kc_div_8_with_y_scale)279   TEST(Q8_AVGPOOL_UP9__NEON, kc_div_8_with_y_scale) {
280     TEST_REQUIRES_ARM_NEON;
281     for (size_t n = 1; n <= 5; n += 2) {
282       for (size_t kc = 8; kc < 128; kc += 24) {
283         for (float y_scale = 0.01f; y_scale < 100.0f; y_scale *= 3.14159265f) {
284           AvgPoolMicrokernelTester()
285             .mr(9)
286             .n(n)
287             .kh(3)
288             .kw(3)
289             .kc(kc)
290             .y_scale(y_scale)
291             .iterations(2)
292             .Test(xnn_q8_avgpool_ukernel_up9__neon);
293         }
294       }
295     }
296   }
297 
TEST(Q8_AVGPOOL_UP9__NEON,kc_div_8_with_y_zero_point)298   TEST(Q8_AVGPOOL_UP9__NEON, kc_div_8_with_y_zero_point) {
299     TEST_REQUIRES_ARM_NEON;
300     for (size_t n = 1; n <= 5; n += 2) {
301       for (size_t kc = 8; kc < 128; kc += 24) {
302         for (int32_t y_zero_point = 0; y_zero_point <= 255; y_zero_point += 51) {
303           AvgPoolMicrokernelTester()
304             .mr(9)
305             .n(n)
306             .kh(3)
307             .kw(3)
308             .kc(kc)
309             .y_zero_point(uint8_t(y_zero_point))
310             .iterations(3)
311             .Test(xnn_q8_avgpool_ukernel_up9__neon);
312         }
313       }
314     }
315   }
316 
TEST(Q8_AVGPOOL_UP9__NEON,kc_div_8_with_qmax)317   TEST(Q8_AVGPOOL_UP9__NEON, kc_div_8_with_qmax) {
318     TEST_REQUIRES_ARM_NEON;
319     for (size_t n = 1; n <= 5; n += 2) {
320       for (size_t kc = 8; kc < 128; kc += 24) {
321         AvgPoolMicrokernelTester()
322           .mr(9)
323           .n(n)
324           .kh(3)
325           .kw(3)
326           .kc(kc)
327           .x_zero_point(128)
328           .y_zero_point(128)
329           .x_scale(1.0f)
330           .y_scale(1.0f)
331           .qmax(128)
332           .Test(xnn_q8_avgpool_ukernel_up9__neon);
333       }
334     }
335   }
336 
TEST(Q8_AVGPOOL_UP9__NEON,kc_div_8_with_qmin)337   TEST(Q8_AVGPOOL_UP9__NEON, kc_div_8_with_qmin) {
338     TEST_REQUIRES_ARM_NEON;
339     for (size_t n = 1; n <= 5; n += 2) {
340       for (size_t kc = 8; kc < 128; kc += 24) {
341         AvgPoolMicrokernelTester()
342           .mr(9)
343           .n(n)
344           .kh(3)
345           .kw(3)
346           .kc(kc)
347           .x_zero_point(128)
348           .y_zero_point(128)
349           .x_scale(1.0f)
350           .y_scale(1.0f)
351           .qmin(128)
352           .Test(xnn_q8_avgpool_ukernel_up9__neon);
353       }
354     }
355   }
356 
TEST(Q8_AVGPOOL_UP9__NEON,small_n)357   TEST(Q8_AVGPOOL_UP9__NEON, small_n) {
358     TEST_REQUIRES_ARM_NEON;
359     for (size_t n = 2; n < 5; n++) {
360       for (size_t ks : std::vector<size_t>{{2, 3}}) {
361         for (size_t kc = 8; kc < 25; kc += 5) {
362           AvgPoolMicrokernelTester()
363             .mr(9)
364             .n(n)
365             .kh(ks)
366             .kw(ks)
367             .kc(kc)
368             .Test(xnn_q8_avgpool_ukernel_up9__neon);
369         }
370       }
371     }
372   }
373 
TEST(Q8_AVGPOOL_UP9__NEON,small_n_with_x_stride)374   TEST(Q8_AVGPOOL_UP9__NEON, small_n_with_x_stride) {
375     TEST_REQUIRES_ARM_NEON;
376     for (size_t n = 2; n < 5; n++) {
377       for (size_t ks : std::vector<size_t>{{2, 3}}) {
378         for (size_t kc = 8; kc < 25; kc += 5) {
379           AvgPoolMicrokernelTester()
380             .mr(9)
381             .n(n)
382             .kh(ks)
383             .kw(ks)
384             .kc(kc)
385             .x_stride(29)
386             .Test(xnn_q8_avgpool_ukernel_up9__neon);
387         }
388       }
389     }
390   }
391 
TEST(Q8_AVGPOOL_UP9__NEON,small_n_with_y_stride)392   TEST(Q8_AVGPOOL_UP9__NEON, small_n_with_y_stride) {
393     TEST_REQUIRES_ARM_NEON;
394     for (size_t n = 2; n < 5; n++) {
395       for (size_t ks : std::vector<size_t>{{2, 3}}) {
396         for (size_t kc = 8; kc < 25; kc += 5) {
397           AvgPoolMicrokernelTester()
398             .mr(9)
399             .n(n)
400             .kh(ks)
401             .kw(ks)
402             .kc(kc)
403             .y_stride(31)
404             .Test(xnn_q8_avgpool_ukernel_up9__neon);
405         }
406       }
407     }
408   }
409 
TEST(Q8_AVGPOOL_UP9__NEON,small_n_with_s)410   TEST(Q8_AVGPOOL_UP9__NEON, small_n_with_s) {
411     TEST_REQUIRES_ARM_NEON;
412     for (size_t n = 2; n < 5; n++) {
413       for (size_t ks : std::vector<size_t>{{2, 3}}) {
414         for (size_t kc = 8; kc < 25; kc += 5) {
415           for (size_t s = 2; s <= ks; s++) {
416             AvgPoolMicrokernelTester()
417               .mr(9)
418               .n(n)
419               .kh(ks)
420               .kw(ks)
421               .kc(kc)
422               .s(s)
423               .Test(xnn_q8_avgpool_ukernel_up9__neon);
424           }
425         }
426       }
427     }
428   }
429 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_eq_8_twopass_fulltile)430   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_eq_8_twopass_fulltile) {
431     TEST_REQUIRES_ARM_NEON;
432     auto tester = AvgPoolMicrokernelTester()
433       .mr(9)
434       .qr(8)
435       .kc(8);
436     const size_t ks = tester.mr() + tester.qr();
437     for (size_t kh = 1; kh <= ks; kh++) {
438       for (size_t kw = 1; kw <= ks; kw++) {
439         if (kh * kw == ks) {
440           tester
441             .kh(kh)
442             .kw(kw)
443             .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
444         }
445       }
446     }
447   }
448 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_eq_8_twopass_subtile)449   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_eq_8_twopass_subtile) {
450     TEST_REQUIRES_ARM_NEON;
451     auto tester = AvgPoolMicrokernelTester()
452       .mr(9)
453       .qr(8)
454       .kc(8);
455     for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
456       tester
457         .kh(ks)
458         .kw(1)
459         .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
460       tester
461         .kh(1)
462         .kw(ks)
463         .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
464     }
465   }
466 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_eq_8_multipass_fulltile)467   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_eq_8_multipass_fulltile) {
468     TEST_REQUIRES_ARM_NEON;
469     for (size_t ks : std::vector<size_t>{{25, 49}}) {
470       auto tester = AvgPoolMicrokernelTester()
471         .mr(9)
472         .qr(8)
473         .kc(8);
474       for (size_t kh = 1; kh <= ks; kh++) {
475         for (size_t kw = 1; kw <= ks; kw++) {
476           if (kh * kw == ks) {
477             tester
478               .kh(kh)
479               .kw(kw)
480               .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
481           }
482         }
483       }
484     }
485   }
486 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_eq_8_multipass_subtile)487   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_eq_8_multipass_subtile) {
488     TEST_REQUIRES_ARM_NEON;
489     for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
490       auto tester = AvgPoolMicrokernelTester()
491         .mr(9)
492         .qr(8)
493         .kc(8);
494       for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
495         tester
496           .kh(ks)
497           .kw(1)
498           .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
499         tester
500           .kh(1)
501           .kw(ks)
502           .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
503       }
504     }
505   }
506 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_div_8_twopass_fulltile)507   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_div_8_twopass_fulltile) {
508     TEST_REQUIRES_ARM_NEON;
509     auto tester = AvgPoolMicrokernelTester()
510       .mr(9)
511       .qr(8)
512       .iterations(3);
513     const size_t ks = 17;
514     for (size_t kc = 8; kc < 128; kc += 24) {
515       tester
516         .kc(kc)
517         .kh(ks)
518         .kw(1)
519         .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
520       tester
521         .kc(kc)
522         .kh(1)
523         .kw(ks)
524         .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
525     }
526   }
527 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_div_8_twopass_subtile)528   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_div_8_twopass_subtile) {
529     TEST_REQUIRES_ARM_NEON;
530     auto tester = AvgPoolMicrokernelTester()
531       .mr(9)
532       .qr(8)
533       .iterations(3);
534     for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
535       for (size_t kc = 8; kc < 128; kc += 24) {
536         tester
537           .kc(kc)
538           .kh(ks)
539           .kw(1)
540           .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
541         tester
542           .kc(kc)
543           .kh(1)
544           .kw(ks)
545           .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
546       }
547     }
548   }
549 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_div_8_twopass_fulltile_with_x_stride)550   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_div_8_twopass_fulltile_with_x_stride) {
551     TEST_REQUIRES_ARM_NEON;
552     auto tester = AvgPoolMicrokernelTester()
553       .mr(9)
554       .qr(8)
555       .iterations(3);
556     const size_t ks = tester.mr() + tester.qr();
557     for (size_t kh = 1; kh <= ks; kh++) {
558       for (size_t kw = 1; kw <= ks; kw++) {
559         if (kh * kw == ks) {
560           for (size_t kc = 8; kc < 128; kc += 24) {
561             tester
562               .kh(kh)
563               .kw(kw)
564               .kc(kc)
565               .x_stride(131)
566               .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
567           }
568         }
569       }
570     }
571   }
572 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_div_8_multipass_fulltile)573   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_div_8_multipass_fulltile) {
574     TEST_REQUIRES_ARM_NEON;
575     for (size_t ks : std::vector<size_t>{{25, 49}}) {
576       auto tester = AvgPoolMicrokernelTester()
577         .mr(9)
578         .qr(8)
579         .iterations(3);
580       for (size_t kh = 1; kh <= ks; kh++) {
581         for (size_t kw = 1; kw <= ks; kw++) {
582           if (kh * kw == ks) {
583             for (size_t kc = 8; kc < 128; kc += 24) {
584               tester
585                 .kh(kh)
586                 .kw(kw)
587                 .kc(kc)
588                 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
589             }
590           }
591         }
592       }
593     }
594   }
595 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_div_8_multipass_subtile)596   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_div_8_multipass_subtile) {
597     TEST_REQUIRES_ARM_NEON;
598     for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
599       auto tester = AvgPoolMicrokernelTester()
600         .mr(9)
601         .qr(8)
602         .iterations(3);
603       for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
604         for (size_t kc = 8; kc < 128; kc += 24) {
605           tester
606             .kc(kc)
607             .kh(ks)
608             .kw(1)
609             .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
610           tester
611             .kc(kc)
612             .kh(1)
613             .kw(ks)
614             .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
615         }
616       }
617     }
618   }
619 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_div_8_multipass_fulltile_with_x_stride)620   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_div_8_multipass_fulltile_with_x_stride) {
621     TEST_REQUIRES_ARM_NEON;
622     for (size_t ks : std::vector<size_t>{{25, 49}}) {
623       auto tester = AvgPoolMicrokernelTester()
624         .mr(9)
625         .qr(8)
626         .iterations(3);
627       for (size_t kh = 1; kh <= ks; kh++) {
628         for (size_t kw = 1; kw <= ks; kw++) {
629           if (kh * kw == ks) {
630             for (size_t kc = 8; kc < 128; kc += 24) {
631               tester
632                 .kh(kh)
633                 .kw(kw)
634                 .kc(kc)
635                 .x_stride(131)
636                 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
637             }
638           }
639         }
640       }
641     }
642   }
643 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_lt_8_twopass_fulltile)644   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_lt_8_twopass_fulltile) {
645     TEST_REQUIRES_ARM_NEON;
646     auto tester = AvgPoolMicrokernelTester()
647       .mr(9)
648       .qr(8)
649       .iterations(3);
650     const size_t ks = tester.mr() + tester.qr();
651     for (size_t kh = 1; kh <= ks; kh++) {
652       for (size_t kw = 1; kw <= ks; kw++) {
653         if (kh * kw == ks) {
654           for (size_t kc = 1; kc < 8; kc++) {
655             tester
656               .kh(kh)
657               .kw(kw)
658               .kc(kc)
659               .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
660           }
661         }
662       }
663     }
664   }
665 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_lt_8_twopass_subtile)666   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_lt_8_twopass_subtile) {
667     TEST_REQUIRES_ARM_NEON;
668     auto tester = AvgPoolMicrokernelTester()
669       .mr(9)
670       .qr(8)
671       .iterations(3);
672     for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
673       for (size_t kc = 1; kc < 8; kc++) {
674         tester
675           .kc(kc)
676           .kh(ks)
677           .kw(1)
678           .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
679         tester
680           .kc(kc)
681           .kh(1)
682           .kw(ks)
683           .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
684       }
685     }
686   }
687 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_lt_8_twopass_fulltile_with_x_stride)688   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_lt_8_twopass_fulltile_with_x_stride) {
689     TEST_REQUIRES_ARM_NEON;
690     auto tester = AvgPoolMicrokernelTester()
691       .mr(9)
692       .qr(8)
693       .iterations(3);
694     const size_t ks = tester.mr() + tester.qr();
695     for (size_t kh = 1; kh <= ks; kh++) {
696       for (size_t kw = 1; kw <= ks; kw++) {
697         if (kh * kw == ks) {
698           for (size_t kc = 1; kc < 8; kc++) {
699             tester
700               .kh(kh)
701               .kw(kw)
702               .kc(kc)
703               .x_stride(23)
704               .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
705           }
706         }
707       }
708     }
709   }
710 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_lt_8_multipass_fulltile)711   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_lt_8_multipass_fulltile) {
712     TEST_REQUIRES_ARM_NEON;
713     for (size_t ks : std::vector<size_t>{{25, 49}}) {
714       auto tester = AvgPoolMicrokernelTester()
715         .mr(9)
716         .qr(8)
717         .iterations(3);
718       for (size_t kh = 1; kh <= ks; kh++) {
719         for (size_t kw = 1; kw <= ks; kw++) {
720           if (kh * kw == ks) {
721             for (size_t kc = 1; kc < 8; kc++) {
722               tester
723                 .kh(kh)
724                 .kw(kw)
725                 .kc(kc)
726                 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
727             }
728           }
729         }
730       }
731     }
732   }
733 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_lt_8_multipass_subtile)734   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_lt_8_multipass_subtile) {
735     TEST_REQUIRES_ARM_NEON;
736     for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
737       auto tester = AvgPoolMicrokernelTester()
738         .mr(9)
739         .qr(8)
740         .iterations(3);
741       for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
742         for (size_t kc = 1; kc < 8; kc++) {
743           tester
744             .kc(kc)
745             .kh(ks)
746             .kw(1)
747             .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
748           tester
749             .kc(kc)
750             .kh(1)
751             .kw(ks)
752             .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
753         }
754       }
755     }
756   }
757 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_lt_8_multipass_fulltile_with_x_stride)758   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_lt_8_multipass_fulltile_with_x_stride) {
759     TEST_REQUIRES_ARM_NEON;
760     for (size_t ks : std::vector<size_t>{{25, 49}}) {
761       auto tester = AvgPoolMicrokernelTester()
762         .mr(9)
763         .qr(8)
764         .iterations(3);
765       for (size_t kh = 1; kh <= ks; kh++) {
766         for (size_t kw = 1; kw <= ks; kw++) {
767           if (kh * kw == ks) {
768             for (size_t kc = 1; kc < 8; kc++) {
769               tester
770                 .kh(kh)
771                 .kw(kw)
772                 .kc(kc)
773                 .x_stride(23)
774                 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
775             }
776           }
777         }
778       }
779     }
780   }
781 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_gt_8_twopass_fulltile)782   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_gt_8_twopass_fulltile) {
783     TEST_REQUIRES_ARM_NEON;
784     auto tester = AvgPoolMicrokernelTester()
785       .mr(9)
786       .qr(8)
787       .iterations(3);
788     const size_t ks = tester.mr() + tester.qr();
789     for (size_t kh = 1; kh <= ks; kh++) {
790       for (size_t kw = 1; kw <= ks; kw++) {
791         if (kh * kw == ks) {
792           for (size_t kc = 9; kc < 16; kc++) {
793             tester
794               .kh(kh)
795               .kw(kw)
796               .kc(kc)
797               .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
798           }
799         }
800       }
801     }
802   }
803 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_gt_8_twopass_subtile)804   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_gt_8_twopass_subtile) {
805     TEST_REQUIRES_ARM_NEON;
806     auto tester = AvgPoolMicrokernelTester()
807       .mr(9)
808       .qr(8)
809       .iterations(3);
810     for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
811       for (size_t kc = 9; kc < 16; kc++) {
812         tester
813           .kc(kc)
814           .kh(ks)
815           .kw(1)
816           .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
817         tester
818           .kc(kc)
819           .kh(1)
820           .kw(ks)
821           .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
822       }
823     }
824   }
825 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_gt_8_twopass_fulltile_with_x_stride)826   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_gt_8_twopass_fulltile_with_x_stride) {
827     TEST_REQUIRES_ARM_NEON;
828     auto tester = AvgPoolMicrokernelTester()
829       .mr(9)
830       .qr(8)
831       .iterations(3);
832     const size_t ks = tester.mr() + tester.qr();
833     for (size_t kh = 1; kh <= ks; kh++) {
834       for (size_t kw = 1; kw <= ks; kw++) {
835         if (kh * kw == ks) {
836           for (size_t kc = 9; kc < 16; kc++) {
837             tester
838               .kh(kh)
839               .kw(kw)
840               .kc(kc)
841               .x_stride(23)
842               .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
843           }
844         }
845       }
846     }
847   }
848 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_gt_8_multipass_fulltile)849   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_gt_8_multipass_fulltile) {
850     TEST_REQUIRES_ARM_NEON;
851     for (size_t ks : std::vector<size_t>{{25, 49}}) {
852       auto tester = AvgPoolMicrokernelTester()
853         .mr(9)
854         .qr(8)
855         .iterations(3);
856       for (size_t kh = 1; kh <= ks; kh++) {
857         for (size_t kw = 1; kw <= ks; kw++) {
858           if (kh * kw == ks) {
859             for (size_t kc = 9; kc < 16; kc++) {
860               tester
861                 .kh(kh)
862                 .kw(kw)
863                 .kc(kc)
864                 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
865             }
866           }
867         }
868       }
869     }
870   }
871 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_gt_8_multipass_subtile)872   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_gt_8_multipass_subtile) {
873     TEST_REQUIRES_ARM_NEON;
874     for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
875       auto tester = AvgPoolMicrokernelTester()
876         .mr(9)
877         .qr(8)
878         .iterations(3);
879       for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
880         for (size_t kc = 9; kc < 16; kc++) {
881           tester
882             .kc(kc)
883             .kh(ks)
884             .kw(1)
885             .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
886           tester
887             .kc(kc)
888             .kh(1)
889             .kw(ks)
890             .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
891         }
892       }
893     }
894   }
895 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_gt_8_multipass_fulltile_with_x_stride)896   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_gt_8_multipass_fulltile_with_x_stride) {
897     TEST_REQUIRES_ARM_NEON;
898     for (size_t ks : std::vector<size_t>{{25, 49}}) {
899       auto tester = AvgPoolMicrokernelTester()
900         .mr(9)
901         .qr(8)
902         .iterations(3);
903       for (size_t kh = 1; kh <= ks; kh++) {
904         for (size_t kw = 1; kw <= ks; kw++) {
905           if (kh * kw == ks) {
906             for (size_t kc = 9; kc < 16; kc++) {
907               tester
908                 .kh(kh)
909                 .kw(kw)
910                 .kc(kc)
911                 .x_stride(23)
912                 .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
913             }
914           }
915         }
916       }
917     }
918   }
919 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_div_8_with_x_scale)920   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_div_8_with_x_scale) {
921     TEST_REQUIRES_ARM_NEON;
922     for (size_t n = 1; n <= 5; n += 2) {
923       for (size_t kc = 8; kc < 128; kc += 24) {
924         for (float x_scale = 0.01f; x_scale < 100.0f; x_scale *= 3.14159265f) {
925           AvgPoolMicrokernelTester()
926             .mr(9)
927             .qr(8)
928             .n(n)
929             .kh(5)
930             .kw(5)
931             .kc(kc)
932             .x_scale(x_scale)
933             .iterations(1)
934             .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
935         }
936       }
937     }
938   }
939 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_div_8_with_x_zero_point)940   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_div_8_with_x_zero_point) {
941     TEST_REQUIRES_ARM_NEON;
942     for (size_t n = 1; n <= 5; n += 2) {
943       for (size_t kc = 8; kc < 128; kc += 24) {
944         for (int32_t x_zero_point = 0; x_zero_point <= 255; x_zero_point += 51) {
945           AvgPoolMicrokernelTester()
946             .mr(9)
947             .qr(8)
948             .n(n)
949             .kh(5)
950             .kw(5)
951             .kc(kc)
952             .x_zero_point(uint8_t(x_zero_point))
953             .iterations(1)
954             .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
955         }
956       }
957     }
958   }
959 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_div_8_with_y_scale)960   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_div_8_with_y_scale) {
961     TEST_REQUIRES_ARM_NEON;
962     for (size_t n = 1; n <= 5; n += 2) {
963       for (size_t kc = 8; kc < 128; kc += 24) {
964         for (float y_scale = 0.01f; y_scale < 100.0f; y_scale *= 3.14159265f) {
965           AvgPoolMicrokernelTester()
966             .mr(9)
967             .qr(8)
968             .n(n)
969             .kh(5)
970             .kw(5)
971             .kc(kc)
972             .y_scale(y_scale)
973             .iterations(1)
974             .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
975         }
976       }
977     }
978   }
979 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_div_8_with_y_zero_point)980   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_div_8_with_y_zero_point) {
981     TEST_REQUIRES_ARM_NEON;
982     for (size_t n = 1; n <= 5; n += 2) {
983       for (size_t kc = 8; kc < 128; kc += 24) {
984         for (int32_t y_zero_point = 0; y_zero_point <= 255; y_zero_point += 51) {
985           AvgPoolMicrokernelTester()
986             .mr(9)
987             .qr(8)
988             .n(n)
989             .kh(5)
990             .kw(5)
991             .kc(kc)
992             .y_zero_point(uint8_t(y_zero_point))
993             .iterations(1)
994             .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
995         }
996       }
997     }
998   }
999 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_div_8_with_qmax)1000   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_div_8_with_qmax) {
1001     TEST_REQUIRES_ARM_NEON;
1002     for (size_t n = 1; n <= 5; n += 2) {
1003       for (size_t kc = 8; kc < 128; kc += 24) {
1004         AvgPoolMicrokernelTester()
1005           .mr(9)
1006           .qr(8)
1007           .n(n)
1008           .kh(5)
1009           .kw(5)
1010           .kc(kc)
1011           .x_zero_point(128)
1012           .y_zero_point(128)
1013           .x_scale(1.0f)
1014           .y_scale(1.0f)
1015           .qmax(128)
1016           .iterations(3)
1017           .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
1018       }
1019     }
1020   }
1021 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,kc_div_8_with_qmin)1022   TEST(Q8_AVGPOOL_MP9P8Q__NEON, kc_div_8_with_qmin) {
1023     TEST_REQUIRES_ARM_NEON;
1024     for (size_t n = 1; n <= 5; n += 2) {
1025       for (size_t kc = 8; kc < 128; kc += 24) {
1026         AvgPoolMicrokernelTester()
1027           .mr(9)
1028           .qr(8)
1029           .n(n)
1030           .kh(5)
1031           .kw(5)
1032           .kc(kc)
1033           .x_zero_point(128)
1034           .y_zero_point(128)
1035           .x_scale(1.0f)
1036           .y_scale(1.0f)
1037           .qmin(128)
1038           .iterations(3)
1039           .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
1040       }
1041     }
1042   }
1043 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,small_n)1044   TEST(Q8_AVGPOOL_MP9P8Q__NEON, small_n) {
1045     TEST_REQUIRES_ARM_NEON;
1046     for (size_t n = 2; n < 5; n++) {
1047       for (size_t ks : std::vector<size_t>{{5, 7}}) {
1048         for (size_t kc = 8; kc < 25; kc += 5) {
1049           AvgPoolMicrokernelTester()
1050             .mr(9)
1051             .qr(8)
1052             .n(n)
1053             .kh(ks)
1054             .kw(ks)
1055             .kc(kc)
1056             .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
1057         }
1058       }
1059     }
1060   }
1061 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,small_n_with_x_stride)1062   TEST(Q8_AVGPOOL_MP9P8Q__NEON, small_n_with_x_stride) {
1063     TEST_REQUIRES_ARM_NEON;
1064     for (size_t n = 2; n < 5; n++) {
1065       for (size_t ks : std::vector<size_t>{{5, 7}}) {
1066         for (size_t kc = 8; kc < 25; kc += 5) {
1067           AvgPoolMicrokernelTester()
1068             .mr(9)
1069             .qr(8)
1070             .n(n)
1071             .kh(ks)
1072             .kw(ks)
1073             .kc(kc)
1074             .x_stride(29)
1075             .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
1076         }
1077       }
1078     }
1079   }
1080 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,small_n_with_y_stride)1081   TEST(Q8_AVGPOOL_MP9P8Q__NEON, small_n_with_y_stride) {
1082     TEST_REQUIRES_ARM_NEON;
1083     for (size_t n = 2; n < 5; n++) {
1084       for (size_t ks : std::vector<size_t>{{5, 7}}) {
1085         for (size_t kc = 8; kc < 25; kc += 5) {
1086           AvgPoolMicrokernelTester()
1087             .mr(9)
1088             .qr(8)
1089             .n(n)
1090             .kh(ks)
1091             .kw(ks)
1092             .kc(kc)
1093             .y_stride(31)
1094             .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
1095         }
1096       }
1097     }
1098   }
1099 
TEST(Q8_AVGPOOL_MP9P8Q__NEON,small_n_with_s)1100   TEST(Q8_AVGPOOL_MP9P8Q__NEON, small_n_with_s) {
1101     TEST_REQUIRES_ARM_NEON;
1102     for (size_t n = 2; n < 5; n++) {
1103       for (size_t ks : std::vector<size_t>{{5, 7}}) {
1104         for (size_t s = 2; s <= 5; s++) {
1105           for (size_t kc = 8; kc < 25; kc += 5) {
1106             AvgPoolMicrokernelTester()
1107               .mr(9)
1108               .qr(8)
1109               .n(n)
1110               .kh(ks)
1111               .kw(ks)
1112               .kc(kc)
1113               .s(s)
1114               .Test(xnn_q8_avgpool_ukernel_mp9p8q__neon);
1115           }
1116         }
1117       }
1118     }
1119   }
1120 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1121 
1122 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(Q8_AVGPOOL_UP9__SSE2,kc_eq_8_fulltile)1123   TEST(Q8_AVGPOOL_UP9__SSE2, kc_eq_8_fulltile) {
1124     TEST_REQUIRES_X86_SSE2;
1125     auto tester = AvgPoolMicrokernelTester()
1126       .mr(9)
1127       .kc(8);
1128     for (size_t kh = 1; kh <= tester.mr(); kh++) {
1129       for (size_t kw = 1; kw <= tester.mr(); kw++) {
1130         if (kh * kw == tester.mr()) {
1131           tester
1132             .kh(kh)
1133             .kw(kw)
1134             .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1135         }
1136       }
1137     }
1138   }
1139 
TEST(Q8_AVGPOOL_UP9__SSE2,kc_eq_8_subtile)1140   TEST(Q8_AVGPOOL_UP9__SSE2, kc_eq_8_subtile) {
1141     TEST_REQUIRES_X86_SSE2;
1142     auto tester = AvgPoolMicrokernelTester()
1143       .mr(9)
1144       .kc(8);
1145     for (size_t ks = 2; ks < tester.mr(); ks++) {
1146       for (size_t kh = 1; kh <= ks; kh++) {
1147         for (size_t kw = 1; kw <= ks; kw++) {
1148           if (kh * kw == ks) {
1149             tester
1150               .kh(kh)
1151               .kw(kw)
1152               .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1153           }
1154         }
1155       }
1156     }
1157   }
1158 
TEST(Q8_AVGPOOL_UP9__SSE2,kc_div_8_fulltile)1159   TEST(Q8_AVGPOOL_UP9__SSE2, kc_div_8_fulltile) {
1160     TEST_REQUIRES_X86_SSE2;
1161     auto tester = AvgPoolMicrokernelTester()
1162       .mr(9);
1163     for (size_t kh = 1; kh <= tester.mr(); kh++) {
1164       for (size_t kw = 1; kw <= tester.mr(); kw++) {
1165         if (kh * kw == tester.mr()) {
1166           for (size_t kc = 8; kc < 128; kc += 24) {
1167             tester
1168               .kh(kh)
1169               .kw(kw)
1170               .kc(kc)
1171               .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1172           }
1173         }
1174       }
1175     }
1176   }
1177 
TEST(Q8_AVGPOOL_UP9__SSE2,kc_div_8_subtile)1178   TEST(Q8_AVGPOOL_UP9__SSE2, kc_div_8_subtile) {
1179     TEST_REQUIRES_X86_SSE2;
1180     auto tester = AvgPoolMicrokernelTester()
1181       .mr(9)
1182       .iterations(3);
1183     for (size_t ks = 2; ks < tester.mr(); ks++) {
1184       for (size_t kh = 1; kh <= ks; kh++) {
1185         for (size_t kw = 1; kw <= ks; kw++) {
1186           if (kh * kw == ks) {
1187             for (size_t kc = 8; kc < 128; kc += 24) {
1188               tester
1189                 .kh(kh)
1190                 .kw(kw)
1191                 .kc(kc)
1192                 .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1193             }
1194           }
1195         }
1196       }
1197     }
1198   }
1199 
TEST(Q8_AVGPOOL_UP9__SSE2,kc_div_8_fulltile_with_x_stride)1200   TEST(Q8_AVGPOOL_UP9__SSE2, kc_div_8_fulltile_with_x_stride) {
1201     TEST_REQUIRES_X86_SSE2;
1202     auto tester = AvgPoolMicrokernelTester()
1203       .mr(9)
1204       .iterations(3);
1205     for (size_t kh = 1; kh <= tester.mr(); kh++) {
1206       for (size_t kw = 1; kw <= tester.mr(); kw++) {
1207         if (kh * kw == tester.mr()) {
1208           for (size_t kc = 8; kc < 128; kc += 24) {
1209             tester
1210               .kh(kh)
1211               .kw(kw)
1212               .kc(kc)
1213               .x_stride(131)
1214               .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1215           }
1216         }
1217       }
1218     }
1219   }
1220 
TEST(Q8_AVGPOOL_UP9__SSE2,kc_lt_8_fulltile)1221   TEST(Q8_AVGPOOL_UP9__SSE2, kc_lt_8_fulltile) {
1222     TEST_REQUIRES_X86_SSE2;
1223     auto tester = AvgPoolMicrokernelTester()
1224       .mr(9);
1225     for (size_t kh = 1; kh <= tester.mr(); kh++) {
1226       for (size_t kw = 1; kw <= tester.mr(); kw++) {
1227         if (kh * kw == tester.mr()) {
1228           for (size_t kc = 1; kc < 8; kc++) {
1229             tester
1230               .kh(kh)
1231               .kw(kw)
1232               .kc(kc)
1233               .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1234           }
1235         }
1236       }
1237     }
1238   }
1239 
TEST(Q8_AVGPOOL_UP9__SSE2,kc_lt_8_subtile)1240   TEST(Q8_AVGPOOL_UP9__SSE2, kc_lt_8_subtile) {
1241     TEST_REQUIRES_X86_SSE2;
1242     auto tester = AvgPoolMicrokernelTester()
1243       .mr(9)
1244       .iterations(3);
1245     for (size_t ks = 2; ks < tester.mr(); ks++) {
1246       for (size_t kh = 1; kh <= ks; kh++) {
1247         for (size_t kw = 1; kw <= ks; kw++) {
1248           if (kh * kw == ks) {
1249             for (size_t kc = 1; kc < 8; kc++) {
1250               tester
1251                 .kh(kh)
1252                 .kw(kw)
1253                 .kc(kc)
1254                 .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1255             }
1256           }
1257         }
1258       }
1259     }
1260   }
1261 
TEST(Q8_AVGPOOL_UP9__SSE2,kc_lt_8_fulltile_with_x_stride)1262   TEST(Q8_AVGPOOL_UP9__SSE2, kc_lt_8_fulltile_with_x_stride) {
1263     TEST_REQUIRES_X86_SSE2;
1264     auto tester = AvgPoolMicrokernelTester()
1265       .mr(9)
1266       .iterations(3);
1267     for (size_t kh = 1; kh <= tester.mr(); kh++) {
1268       for (size_t kw = 1; kw <= tester.mr(); kw++) {
1269         if (kh * kw == tester.mr()) {
1270           for (size_t kc = 1; kc < 8; kc++) {
1271             tester
1272               .kh(kh)
1273               .kw(kw)
1274               .kc(kc)
1275               .x_stride(23)
1276               .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1277           }
1278         }
1279       }
1280     }
1281   }
1282 
TEST(Q8_AVGPOOL_UP9__SSE2,kc_gt_8_fulltile)1283   TEST(Q8_AVGPOOL_UP9__SSE2, kc_gt_8_fulltile) {
1284     TEST_REQUIRES_X86_SSE2;
1285     auto tester = AvgPoolMicrokernelTester()
1286       .mr(9);
1287     for (size_t kh = 1; kh <= tester.mr(); kh++) {
1288       for (size_t kw = 1; kw <= tester.mr(); kw++) {
1289         if (kh * kw == tester.mr()) {
1290           for (size_t kc = 9; kc < 16; kc++) {
1291             tester
1292               .kh(kh)
1293               .kw(kw)
1294               .kc(kc)
1295               .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1296           }
1297         }
1298       }
1299     }
1300   }
1301 
TEST(Q8_AVGPOOL_UP9__SSE2,kc_gt_8_subtile)1302   TEST(Q8_AVGPOOL_UP9__SSE2, kc_gt_8_subtile) {
1303     TEST_REQUIRES_X86_SSE2;
1304     auto tester = AvgPoolMicrokernelTester()
1305       .mr(9)
1306       .iterations(3);
1307     for (size_t ks = 2; ks < tester.mr(); ks++) {
1308       for (size_t kh = 1; kh <= ks; kh++) {
1309         for (size_t kw = 1; kw <= ks; kw++) {
1310           if (kh * kw == ks) {
1311             for (size_t kc = 9; kc < 16; kc++) {
1312               tester
1313                 .kh(kh)
1314                 .kw(kw)
1315                 .kc(kc)
1316                 .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1317             }
1318           }
1319         }
1320       }
1321     }
1322   }
1323 
TEST(Q8_AVGPOOL_UP9__SSE2,kc_gt_8_fulltile_with_x_stride)1324   TEST(Q8_AVGPOOL_UP9__SSE2, kc_gt_8_fulltile_with_x_stride) {
1325     TEST_REQUIRES_X86_SSE2;
1326     auto tester = AvgPoolMicrokernelTester()
1327       .mr(9)
1328       .iterations(3);
1329     for (size_t kh = 1; kh <= tester.mr(); kh++) {
1330       for (size_t kw = 1; kw <= tester.mr(); kw++) {
1331         if (kh * kw == tester.mr()) {
1332           for (size_t kc = 9; kc < 16; kc++) {
1333             tester
1334               .kh(kh)
1335               .kw(kw)
1336               .kc(kc)
1337               .x_stride(23)
1338               .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1339           }
1340         }
1341       }
1342     }
1343   }
1344 
TEST(Q8_AVGPOOL_UP9__SSE2,kc_div_8_with_x_scale)1345   TEST(Q8_AVGPOOL_UP9__SSE2, kc_div_8_with_x_scale) {
1346     TEST_REQUIRES_X86_SSE2;
1347     for (size_t n = 1; n <= 5; n += 2) {
1348       for (size_t kc = 8; kc < 128; kc += 24) {
1349         for (float x_scale = 0.01f; x_scale < 100.0f; x_scale *= 3.14159265f) {
1350           AvgPoolMicrokernelTester()
1351             .mr(9)
1352             .n(n)
1353             .kh(3)
1354             .kw(3)
1355             .kc(kc)
1356             .x_scale(x_scale)
1357             .iterations(2)
1358             .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1359         }
1360       }
1361     }
1362   }
1363 
TEST(Q8_AVGPOOL_UP9__SSE2,kc_div_8_with_x_zero_point)1364   TEST(Q8_AVGPOOL_UP9__SSE2, kc_div_8_with_x_zero_point) {
1365     TEST_REQUIRES_X86_SSE2;
1366     for (size_t n = 1; n <= 5; n += 2) {
1367       for (size_t kc = 8; kc < 128; kc += 24) {
1368         for (int32_t x_zero_point = 0; x_zero_point <= 255; x_zero_point += 51) {
1369           AvgPoolMicrokernelTester()
1370             .mr(9)
1371             .n(n)
1372             .kh(3)
1373             .kw(3)
1374             .kc(kc)
1375             .x_zero_point(uint8_t(x_zero_point))
1376             .iterations(3)
1377             .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1378         }
1379       }
1380     }
1381   }
1382 
TEST(Q8_AVGPOOL_UP9__SSE2,kc_div_8_with_y_scale)1383   TEST(Q8_AVGPOOL_UP9__SSE2, kc_div_8_with_y_scale) {
1384     TEST_REQUIRES_X86_SSE2;
1385     for (size_t n = 1; n <= 5; n += 2) {
1386       for (size_t kc = 8; kc < 128; kc += 24) {
1387         for (float y_scale = 0.01f; y_scale < 100.0f; y_scale *= 3.14159265f) {
1388           AvgPoolMicrokernelTester()
1389             .mr(9)
1390             .n(n)
1391             .kh(3)
1392             .kw(3)
1393             .kc(kc)
1394             .y_scale(y_scale)
1395             .iterations(2)
1396             .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1397         }
1398       }
1399     }
1400   }
1401 
TEST(Q8_AVGPOOL_UP9__SSE2,kc_div_8_with_y_zero_point)1402   TEST(Q8_AVGPOOL_UP9__SSE2, kc_div_8_with_y_zero_point) {
1403     TEST_REQUIRES_X86_SSE2;
1404     for (size_t n = 1; n <= 5; n += 2) {
1405       for (size_t kc = 8; kc < 128; kc += 24) {
1406         for (int32_t y_zero_point = 0; y_zero_point <= 255; y_zero_point += 51) {
1407           AvgPoolMicrokernelTester()
1408             .mr(9)
1409             .n(n)
1410             .kh(3)
1411             .kw(3)
1412             .kc(kc)
1413             .y_zero_point(uint8_t(y_zero_point))
1414             .iterations(3)
1415             .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1416         }
1417       }
1418     }
1419   }
1420 
TEST(Q8_AVGPOOL_UP9__SSE2,kc_div_8_with_qmax)1421   TEST(Q8_AVGPOOL_UP9__SSE2, kc_div_8_with_qmax) {
1422     TEST_REQUIRES_X86_SSE2;
1423     for (size_t n = 1; n <= 5; n += 2) {
1424       for (size_t kc = 8; kc < 128; kc += 24) {
1425         AvgPoolMicrokernelTester()
1426           .mr(9)
1427           .n(n)
1428           .kh(3)
1429           .kw(3)
1430           .kc(kc)
1431           .x_zero_point(128)
1432           .y_zero_point(128)
1433           .x_scale(1.0f)
1434           .y_scale(1.0f)
1435           .qmax(128)
1436           .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1437       }
1438     }
1439   }
1440 
TEST(Q8_AVGPOOL_UP9__SSE2,kc_div_8_with_qmin)1441   TEST(Q8_AVGPOOL_UP9__SSE2, kc_div_8_with_qmin) {
1442     TEST_REQUIRES_X86_SSE2;
1443     for (size_t n = 1; n <= 5; n += 2) {
1444       for (size_t kc = 8; kc < 128; kc += 24) {
1445         AvgPoolMicrokernelTester()
1446           .mr(9)
1447           .n(n)
1448           .kh(3)
1449           .kw(3)
1450           .kc(kc)
1451           .x_zero_point(128)
1452           .y_zero_point(128)
1453           .x_scale(1.0f)
1454           .y_scale(1.0f)
1455           .qmin(128)
1456           .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1457       }
1458     }
1459   }
1460 
TEST(Q8_AVGPOOL_UP9__SSE2,small_n)1461   TEST(Q8_AVGPOOL_UP9__SSE2, small_n) {
1462     TEST_REQUIRES_X86_SSE2;
1463     for (size_t n = 2; n < 5; n++) {
1464       for (size_t ks : std::vector<size_t>{{2, 3}}) {
1465         for (size_t kc = 8; kc < 25; kc += 5) {
1466           AvgPoolMicrokernelTester()
1467             .mr(9)
1468             .n(n)
1469             .kh(ks)
1470             .kw(ks)
1471             .kc(kc)
1472             .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1473         }
1474       }
1475     }
1476   }
1477 
TEST(Q8_AVGPOOL_UP9__SSE2,small_n_with_x_stride)1478   TEST(Q8_AVGPOOL_UP9__SSE2, small_n_with_x_stride) {
1479     TEST_REQUIRES_X86_SSE2;
1480     for (size_t n = 2; n < 5; n++) {
1481       for (size_t ks : std::vector<size_t>{{2, 3}}) {
1482         for (size_t kc = 8; kc < 25; kc += 5) {
1483           AvgPoolMicrokernelTester()
1484             .mr(9)
1485             .n(n)
1486             .kh(ks)
1487             .kw(ks)
1488             .kc(kc)
1489             .x_stride(29)
1490             .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1491         }
1492       }
1493     }
1494   }
1495 
TEST(Q8_AVGPOOL_UP9__SSE2,small_n_with_y_stride)1496   TEST(Q8_AVGPOOL_UP9__SSE2, small_n_with_y_stride) {
1497     TEST_REQUIRES_X86_SSE2;
1498     for (size_t n = 2; n < 5; n++) {
1499       for (size_t ks : std::vector<size_t>{{2, 3}}) {
1500         for (size_t kc = 8; kc < 25; kc += 5) {
1501           AvgPoolMicrokernelTester()
1502             .mr(9)
1503             .n(n)
1504             .kh(ks)
1505             .kw(ks)
1506             .kc(kc)
1507             .y_stride(31)
1508             .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1509         }
1510       }
1511     }
1512   }
1513 
TEST(Q8_AVGPOOL_UP9__SSE2,small_n_with_s)1514   TEST(Q8_AVGPOOL_UP9__SSE2, small_n_with_s) {
1515     TEST_REQUIRES_X86_SSE2;
1516     for (size_t n = 2; n < 5; n++) {
1517       for (size_t ks : std::vector<size_t>{{2, 3}}) {
1518         for (size_t kc = 8; kc < 25; kc += 5) {
1519           for (size_t s = 2; s <= ks; s++) {
1520             AvgPoolMicrokernelTester()
1521               .mr(9)
1522               .n(n)
1523               .kh(ks)
1524               .kw(ks)
1525               .kc(kc)
1526               .s(s)
1527               .Test(xnn_q8_avgpool_ukernel_up9__sse2);
1528           }
1529         }
1530       }
1531     }
1532   }
1533 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_eq_8_twopass_fulltile)1534   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_eq_8_twopass_fulltile) {
1535     TEST_REQUIRES_X86_SSE2;
1536     auto tester = AvgPoolMicrokernelTester()
1537       .mr(9)
1538       .qr(8)
1539       .kc(8);
1540     const size_t ks = tester.mr() + tester.qr();
1541     for (size_t kh = 1; kh <= ks; kh++) {
1542       for (size_t kw = 1; kw <= ks; kw++) {
1543         if (kh * kw == ks) {
1544           tester
1545             .kh(kh)
1546             .kw(kw)
1547             .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1548         }
1549       }
1550     }
1551   }
1552 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_eq_8_twopass_subtile)1553   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_eq_8_twopass_subtile) {
1554     TEST_REQUIRES_X86_SSE2;
1555     auto tester = AvgPoolMicrokernelTester()
1556       .mr(9)
1557       .qr(8)
1558       .kc(8);
1559     for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
1560       tester
1561         .kh(ks)
1562         .kw(1)
1563         .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1564       tester
1565         .kh(1)
1566         .kw(ks)
1567         .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1568     }
1569   }
1570 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_eq_8_multipass_fulltile)1571   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_eq_8_multipass_fulltile) {
1572     TEST_REQUIRES_X86_SSE2;
1573     for (size_t ks : std::vector<size_t>{{25, 49}}) {
1574       auto tester = AvgPoolMicrokernelTester()
1575         .mr(9)
1576         .qr(8)
1577         .kc(8);
1578       for (size_t kh = 1; kh <= ks; kh++) {
1579         for (size_t kw = 1; kw <= ks; kw++) {
1580           if (kh * kw == ks) {
1581             tester
1582               .kh(kh)
1583               .kw(kw)
1584               .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1585           }
1586         }
1587       }
1588     }
1589   }
1590 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_eq_8_multipass_subtile)1591   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_eq_8_multipass_subtile) {
1592     TEST_REQUIRES_X86_SSE2;
1593     for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
1594       auto tester = AvgPoolMicrokernelTester()
1595         .mr(9)
1596         .qr(8)
1597         .kc(8);
1598       for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
1599         tester
1600           .kh(ks)
1601           .kw(1)
1602           .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1603         tester
1604           .kh(1)
1605           .kw(ks)
1606           .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1607       }
1608     }
1609   }
1610 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_div_8_twopass_fulltile)1611   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_div_8_twopass_fulltile) {
1612     TEST_REQUIRES_X86_SSE2;
1613     auto tester = AvgPoolMicrokernelTester()
1614       .mr(9)
1615       .qr(8)
1616       .iterations(3);
1617     const size_t ks = 17;
1618     for (size_t kc = 8; kc < 128; kc += 24) {
1619       tester
1620         .kc(kc)
1621         .kh(ks)
1622         .kw(1)
1623         .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1624       tester
1625         .kc(kc)
1626         .kh(1)
1627         .kw(ks)
1628         .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1629     }
1630   }
1631 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_div_8_twopass_subtile)1632   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_div_8_twopass_subtile) {
1633     TEST_REQUIRES_X86_SSE2;
1634     auto tester = AvgPoolMicrokernelTester()
1635       .mr(9)
1636       .qr(8)
1637       .iterations(3);
1638     for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
1639       for (size_t kc = 8; kc < 128; kc += 24) {
1640         tester
1641           .kc(kc)
1642           .kh(ks)
1643           .kw(1)
1644           .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1645         tester
1646           .kc(kc)
1647           .kh(1)
1648           .kw(ks)
1649           .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1650       }
1651     }
1652   }
1653 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_div_8_twopass_fulltile_with_x_stride)1654   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_div_8_twopass_fulltile_with_x_stride) {
1655     TEST_REQUIRES_X86_SSE2;
1656     auto tester = AvgPoolMicrokernelTester()
1657       .mr(9)
1658       .qr(8)
1659       .iterations(3);
1660     const size_t ks = tester.mr() + tester.qr();
1661     for (size_t kh = 1; kh <= ks; kh++) {
1662       for (size_t kw = 1; kw <= ks; kw++) {
1663         if (kh * kw == ks) {
1664           for (size_t kc = 8; kc < 128; kc += 24) {
1665             tester
1666               .kh(kh)
1667               .kw(kw)
1668               .kc(kc)
1669               .x_stride(131)
1670               .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1671           }
1672         }
1673       }
1674     }
1675   }
1676 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_div_8_multipass_fulltile)1677   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_div_8_multipass_fulltile) {
1678     TEST_REQUIRES_X86_SSE2;
1679     for (size_t ks : std::vector<size_t>{{25, 49}}) {
1680       auto tester = AvgPoolMicrokernelTester()
1681         .mr(9)
1682         .qr(8)
1683         .iterations(3);
1684       for (size_t kh = 1; kh <= ks; kh++) {
1685         for (size_t kw = 1; kw <= ks; kw++) {
1686           if (kh * kw == ks) {
1687             for (size_t kc = 8; kc < 128; kc += 24) {
1688               tester
1689                 .kh(kh)
1690                 .kw(kw)
1691                 .kc(kc)
1692                 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1693             }
1694           }
1695         }
1696       }
1697     }
1698   }
1699 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_div_8_multipass_subtile)1700   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_div_8_multipass_subtile) {
1701     TEST_REQUIRES_X86_SSE2;
1702     for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
1703       auto tester = AvgPoolMicrokernelTester()
1704         .mr(9)
1705         .qr(8)
1706         .iterations(3);
1707       for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
1708         for (size_t kc = 8; kc < 128; kc += 24) {
1709           tester
1710             .kc(kc)
1711             .kh(ks)
1712             .kw(1)
1713             .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1714           tester
1715             .kc(kc)
1716             .kh(1)
1717             .kw(ks)
1718             .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1719         }
1720       }
1721     }
1722   }
1723 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_div_8_multipass_fulltile_with_x_stride)1724   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_div_8_multipass_fulltile_with_x_stride) {
1725     TEST_REQUIRES_X86_SSE2;
1726     for (size_t ks : std::vector<size_t>{{25, 49}}) {
1727       auto tester = AvgPoolMicrokernelTester()
1728         .mr(9)
1729         .qr(8)
1730         .iterations(3);
1731       for (size_t kh = 1; kh <= ks; kh++) {
1732         for (size_t kw = 1; kw <= ks; kw++) {
1733           if (kh * kw == ks) {
1734             for (size_t kc = 8; kc < 128; kc += 24) {
1735               tester
1736                 .kh(kh)
1737                 .kw(kw)
1738                 .kc(kc)
1739                 .x_stride(131)
1740                 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1741             }
1742           }
1743         }
1744       }
1745     }
1746   }
1747 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_lt_8_twopass_fulltile)1748   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_lt_8_twopass_fulltile) {
1749     TEST_REQUIRES_X86_SSE2;
1750     auto tester = AvgPoolMicrokernelTester()
1751       .mr(9)
1752       .qr(8)
1753       .iterations(3);
1754     const size_t ks = tester.mr() + tester.qr();
1755     for (size_t kh = 1; kh <= ks; kh++) {
1756       for (size_t kw = 1; kw <= ks; kw++) {
1757         if (kh * kw == ks) {
1758           for (size_t kc = 1; kc < 8; kc++) {
1759             tester
1760               .kh(kh)
1761               .kw(kw)
1762               .kc(kc)
1763               .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1764           }
1765         }
1766       }
1767     }
1768   }
1769 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_lt_8_twopass_subtile)1770   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_lt_8_twopass_subtile) {
1771     TEST_REQUIRES_X86_SSE2;
1772     auto tester = AvgPoolMicrokernelTester()
1773       .mr(9)
1774       .qr(8)
1775       .iterations(3);
1776     for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
1777       for (size_t kc = 1; kc < 8; kc++) {
1778         tester
1779           .kc(kc)
1780           .kh(ks)
1781           .kw(1)
1782           .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1783         tester
1784           .kc(kc)
1785           .kh(1)
1786           .kw(ks)
1787           .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1788       }
1789     }
1790   }
1791 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_lt_8_twopass_fulltile_with_x_stride)1792   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_lt_8_twopass_fulltile_with_x_stride) {
1793     TEST_REQUIRES_X86_SSE2;
1794     auto tester = AvgPoolMicrokernelTester()
1795       .mr(9)
1796       .qr(8)
1797       .iterations(3);
1798     const size_t ks = tester.mr() + tester.qr();
1799     for (size_t kh = 1; kh <= ks; kh++) {
1800       for (size_t kw = 1; kw <= ks; kw++) {
1801         if (kh * kw == ks) {
1802           for (size_t kc = 1; kc < 8; kc++) {
1803             tester
1804               .kh(kh)
1805               .kw(kw)
1806               .kc(kc)
1807               .x_stride(23)
1808               .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1809           }
1810         }
1811       }
1812     }
1813   }
1814 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_lt_8_multipass_fulltile)1815   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_lt_8_multipass_fulltile) {
1816     TEST_REQUIRES_X86_SSE2;
1817     for (size_t ks : std::vector<size_t>{{25, 49}}) {
1818       auto tester = AvgPoolMicrokernelTester()
1819         .mr(9)
1820         .qr(8)
1821         .iterations(3);
1822       for (size_t kh = 1; kh <= ks; kh++) {
1823         for (size_t kw = 1; kw <= ks; kw++) {
1824           if (kh * kw == ks) {
1825             for (size_t kc = 1; kc < 8; kc++) {
1826               tester
1827                 .kh(kh)
1828                 .kw(kw)
1829                 .kc(kc)
1830                 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1831             }
1832           }
1833         }
1834       }
1835     }
1836   }
1837 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_lt_8_multipass_subtile)1838   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_lt_8_multipass_subtile) {
1839     TEST_REQUIRES_X86_SSE2;
1840     for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
1841       auto tester = AvgPoolMicrokernelTester()
1842         .mr(9)
1843         .qr(8)
1844         .iterations(3);
1845       for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
1846         for (size_t kc = 1; kc < 8; kc++) {
1847           tester
1848             .kc(kc)
1849             .kh(ks)
1850             .kw(1)
1851             .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1852           tester
1853             .kc(kc)
1854             .kh(1)
1855             .kw(ks)
1856             .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1857         }
1858       }
1859     }
1860   }
1861 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_lt_8_multipass_fulltile_with_x_stride)1862   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_lt_8_multipass_fulltile_with_x_stride) {
1863     TEST_REQUIRES_X86_SSE2;
1864     for (size_t ks : std::vector<size_t>{{25, 49}}) {
1865       auto tester = AvgPoolMicrokernelTester()
1866         .mr(9)
1867         .qr(8)
1868         .iterations(3);
1869       for (size_t kh = 1; kh <= ks; kh++) {
1870         for (size_t kw = 1; kw <= ks; kw++) {
1871           if (kh * kw == ks) {
1872             for (size_t kc = 1; kc < 8; kc++) {
1873               tester
1874                 .kh(kh)
1875                 .kw(kw)
1876                 .kc(kc)
1877                 .x_stride(23)
1878                 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1879             }
1880           }
1881         }
1882       }
1883     }
1884   }
1885 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_gt_8_twopass_fulltile)1886   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_gt_8_twopass_fulltile) {
1887     TEST_REQUIRES_X86_SSE2;
1888     auto tester = AvgPoolMicrokernelTester()
1889       .mr(9)
1890       .qr(8)
1891       .iterations(3);
1892     const size_t ks = tester.mr() + tester.qr();
1893     for (size_t kh = 1; kh <= ks; kh++) {
1894       for (size_t kw = 1; kw <= ks; kw++) {
1895         if (kh * kw == ks) {
1896           for (size_t kc = 9; kc < 16; kc++) {
1897             tester
1898               .kh(kh)
1899               .kw(kw)
1900               .kc(kc)
1901               .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1902           }
1903         }
1904       }
1905     }
1906   }
1907 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_gt_8_twopass_subtile)1908   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_gt_8_twopass_subtile) {
1909     TEST_REQUIRES_X86_SSE2;
1910     auto tester = AvgPoolMicrokernelTester()
1911       .mr(9)
1912       .qr(8)
1913       .iterations(3);
1914     for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
1915       for (size_t kc = 9; kc < 16; kc++) {
1916         tester
1917           .kc(kc)
1918           .kh(ks)
1919           .kw(1)
1920           .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1921         tester
1922           .kc(kc)
1923           .kh(1)
1924           .kw(ks)
1925           .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1926       }
1927     }
1928   }
1929 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_gt_8_twopass_fulltile_with_x_stride)1930   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_gt_8_twopass_fulltile_with_x_stride) {
1931     TEST_REQUIRES_X86_SSE2;
1932     auto tester = AvgPoolMicrokernelTester()
1933       .mr(9)
1934       .qr(8)
1935       .iterations(3);
1936     const size_t ks = tester.mr() + tester.qr();
1937     for (size_t kh = 1; kh <= ks; kh++) {
1938       for (size_t kw = 1; kw <= ks; kw++) {
1939         if (kh * kw == ks) {
1940           for (size_t kc = 9; kc < 16; kc++) {
1941             tester
1942               .kh(kh)
1943               .kw(kw)
1944               .kc(kc)
1945               .x_stride(23)
1946               .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1947           }
1948         }
1949       }
1950     }
1951   }
1952 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_gt_8_multipass_fulltile)1953   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_gt_8_multipass_fulltile) {
1954     TEST_REQUIRES_X86_SSE2;
1955     for (size_t ks : std::vector<size_t>{{25, 49}}) {
1956       auto tester = AvgPoolMicrokernelTester()
1957         .mr(9)
1958         .qr(8)
1959         .iterations(3);
1960       for (size_t kh = 1; kh <= ks; kh++) {
1961         for (size_t kw = 1; kw <= ks; kw++) {
1962           if (kh * kw == ks) {
1963             for (size_t kc = 9; kc < 16; kc++) {
1964               tester
1965                 .kh(kh)
1966                 .kw(kw)
1967                 .kc(kc)
1968                 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1969             }
1970           }
1971         }
1972       }
1973     }
1974   }
1975 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_gt_8_multipass_subtile)1976   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_gt_8_multipass_subtile) {
1977     TEST_REQUIRES_X86_SSE2;
1978     for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
1979       auto tester = AvgPoolMicrokernelTester()
1980         .mr(9)
1981         .qr(8)
1982         .iterations(3);
1983       for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
1984         for (size_t kc = 9; kc < 16; kc++) {
1985           tester
1986             .kc(kc)
1987             .kh(ks)
1988             .kw(1)
1989             .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1990           tester
1991             .kc(kc)
1992             .kh(1)
1993             .kw(ks)
1994             .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
1995         }
1996       }
1997     }
1998   }
1999 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_gt_8_multipass_fulltile_with_x_stride)2000   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_gt_8_multipass_fulltile_with_x_stride) {
2001     TEST_REQUIRES_X86_SSE2;
2002     for (size_t ks : std::vector<size_t>{{25, 49}}) {
2003       auto tester = AvgPoolMicrokernelTester()
2004         .mr(9)
2005         .qr(8)
2006         .iterations(3);
2007       for (size_t kh = 1; kh <= ks; kh++) {
2008         for (size_t kw = 1; kw <= ks; kw++) {
2009           if (kh * kw == ks) {
2010             for (size_t kc = 9; kc < 16; kc++) {
2011               tester
2012                 .kh(kh)
2013                 .kw(kw)
2014                 .kc(kc)
2015                 .x_stride(23)
2016                 .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
2017             }
2018           }
2019         }
2020       }
2021     }
2022   }
2023 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_div_8_with_x_scale)2024   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_div_8_with_x_scale) {
2025     TEST_REQUIRES_X86_SSE2;
2026     for (size_t n = 1; n <= 5; n += 2) {
2027       for (size_t kc = 8; kc < 128; kc += 24) {
2028         for (float x_scale = 0.01f; x_scale < 100.0f; x_scale *= 3.14159265f) {
2029           AvgPoolMicrokernelTester()
2030             .mr(9)
2031             .qr(8)
2032             .n(n)
2033             .kh(5)
2034             .kw(5)
2035             .kc(kc)
2036             .x_scale(x_scale)
2037             .iterations(1)
2038             .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
2039         }
2040       }
2041     }
2042   }
2043 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_div_8_with_x_zero_point)2044   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_div_8_with_x_zero_point) {
2045     TEST_REQUIRES_X86_SSE2;
2046     for (size_t n = 1; n <= 5; n += 2) {
2047       for (size_t kc = 8; kc < 128; kc += 24) {
2048         for (int32_t x_zero_point = 0; x_zero_point <= 255; x_zero_point += 51) {
2049           AvgPoolMicrokernelTester()
2050             .mr(9)
2051             .qr(8)
2052             .n(n)
2053             .kh(5)
2054             .kw(5)
2055             .kc(kc)
2056             .x_zero_point(uint8_t(x_zero_point))
2057             .iterations(1)
2058             .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
2059         }
2060       }
2061     }
2062   }
2063 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_div_8_with_y_scale)2064   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_div_8_with_y_scale) {
2065     TEST_REQUIRES_X86_SSE2;
2066     for (size_t n = 1; n <= 5; n += 2) {
2067       for (size_t kc = 8; kc < 128; kc += 24) {
2068         for (float y_scale = 0.01f; y_scale < 100.0f; y_scale *= 3.14159265f) {
2069           AvgPoolMicrokernelTester()
2070             .mr(9)
2071             .qr(8)
2072             .n(n)
2073             .kh(5)
2074             .kw(5)
2075             .kc(kc)
2076             .y_scale(y_scale)
2077             .iterations(1)
2078             .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
2079         }
2080       }
2081     }
2082   }
2083 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_div_8_with_y_zero_point)2084   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_div_8_with_y_zero_point) {
2085     TEST_REQUIRES_X86_SSE2;
2086     for (size_t n = 1; n <= 5; n += 2) {
2087       for (size_t kc = 8; kc < 128; kc += 24) {
2088         for (int32_t y_zero_point = 0; y_zero_point <= 255; y_zero_point += 51) {
2089           AvgPoolMicrokernelTester()
2090             .mr(9)
2091             .qr(8)
2092             .n(n)
2093             .kh(5)
2094             .kw(5)
2095             .kc(kc)
2096             .y_zero_point(uint8_t(y_zero_point))
2097             .iterations(1)
2098             .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
2099         }
2100       }
2101     }
2102   }
2103 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_div_8_with_qmax)2104   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_div_8_with_qmax) {
2105     TEST_REQUIRES_X86_SSE2;
2106     for (size_t n = 1; n <= 5; n += 2) {
2107       for (size_t kc = 8; kc < 128; kc += 24) {
2108         AvgPoolMicrokernelTester()
2109           .mr(9)
2110           .qr(8)
2111           .n(n)
2112           .kh(5)
2113           .kw(5)
2114           .kc(kc)
2115           .x_zero_point(128)
2116           .y_zero_point(128)
2117           .x_scale(1.0f)
2118           .y_scale(1.0f)
2119           .qmax(128)
2120           .iterations(3)
2121           .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
2122       }
2123     }
2124   }
2125 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,kc_div_8_with_qmin)2126   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, kc_div_8_with_qmin) {
2127     TEST_REQUIRES_X86_SSE2;
2128     for (size_t n = 1; n <= 5; n += 2) {
2129       for (size_t kc = 8; kc < 128; kc += 24) {
2130         AvgPoolMicrokernelTester()
2131           .mr(9)
2132           .qr(8)
2133           .n(n)
2134           .kh(5)
2135           .kw(5)
2136           .kc(kc)
2137           .x_zero_point(128)
2138           .y_zero_point(128)
2139           .x_scale(1.0f)
2140           .y_scale(1.0f)
2141           .qmin(128)
2142           .iterations(3)
2143           .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
2144       }
2145     }
2146   }
2147 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,small_n)2148   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, small_n) {
2149     TEST_REQUIRES_X86_SSE2;
2150     for (size_t n = 2; n < 5; n++) {
2151       for (size_t ks : std::vector<size_t>{{5, 7}}) {
2152         for (size_t kc = 8; kc < 25; kc += 5) {
2153           AvgPoolMicrokernelTester()
2154             .mr(9)
2155             .qr(8)
2156             .n(n)
2157             .kh(ks)
2158             .kw(ks)
2159             .kc(kc)
2160             .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
2161         }
2162       }
2163     }
2164   }
2165 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,small_n_with_x_stride)2166   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, small_n_with_x_stride) {
2167     TEST_REQUIRES_X86_SSE2;
2168     for (size_t n = 2; n < 5; n++) {
2169       for (size_t ks : std::vector<size_t>{{5, 7}}) {
2170         for (size_t kc = 8; kc < 25; kc += 5) {
2171           AvgPoolMicrokernelTester()
2172             .mr(9)
2173             .qr(8)
2174             .n(n)
2175             .kh(ks)
2176             .kw(ks)
2177             .kc(kc)
2178             .x_stride(29)
2179             .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
2180         }
2181       }
2182     }
2183   }
2184 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,small_n_with_y_stride)2185   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, small_n_with_y_stride) {
2186     TEST_REQUIRES_X86_SSE2;
2187     for (size_t n = 2; n < 5; n++) {
2188       for (size_t ks : std::vector<size_t>{{5, 7}}) {
2189         for (size_t kc = 8; kc < 25; kc += 5) {
2190           AvgPoolMicrokernelTester()
2191             .mr(9)
2192             .qr(8)
2193             .n(n)
2194             .kh(ks)
2195             .kw(ks)
2196             .kc(kc)
2197             .y_stride(31)
2198             .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
2199         }
2200       }
2201     }
2202   }
2203 
TEST(Q8_AVGPOOL_MP9P8Q__SSE2,small_n_with_s)2204   TEST(Q8_AVGPOOL_MP9P8Q__SSE2, small_n_with_s) {
2205     TEST_REQUIRES_X86_SSE2;
2206     for (size_t n = 2; n < 5; n++) {
2207       for (size_t ks : std::vector<size_t>{{5, 7}}) {
2208         for (size_t s = 2; s <= 5; s++) {
2209           for (size_t kc = 8; kc < 25; kc += 5) {
2210             AvgPoolMicrokernelTester()
2211               .mr(9)
2212               .qr(8)
2213               .n(n)
2214               .kh(ks)
2215               .kw(ks)
2216               .kc(kc)
2217               .s(s)
2218               .Test(xnn_q8_avgpool_ukernel_mp9p8q__sse2);
2219           }
2220         }
2221       }
2222     }
2223   }
2224 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2225 
TEST(Q8_AVGPOOL_UP9__SCALAR,kc_eq_1_fulltile)2226 TEST(Q8_AVGPOOL_UP9__SCALAR, kc_eq_1_fulltile) {
2227   auto tester = AvgPoolMicrokernelTester()
2228     .mr(9)
2229     .kc(1);
2230   for (size_t kh = 1; kh <= tester.mr(); kh++) {
2231     for (size_t kw = 1; kw <= tester.mr(); kw++) {
2232       if (kh * kw == tester.mr()) {
2233         tester
2234           .kh(kh)
2235           .kw(kw)
2236           .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2237       }
2238     }
2239   }
2240 }
2241 
TEST(Q8_AVGPOOL_UP9__SCALAR,kc_eq_1_subtile)2242 TEST(Q8_AVGPOOL_UP9__SCALAR, kc_eq_1_subtile) {
2243   auto tester = AvgPoolMicrokernelTester()
2244     .mr(9)
2245     .kc(1);
2246   for (size_t ks = 2; ks < tester.mr(); ks++) {
2247     for (size_t kh = 1; kh <= ks; kh++) {
2248       for (size_t kw = 1; kw <= ks; kw++) {
2249         if (kh * kw == ks) {
2250           tester
2251             .kh(kh)
2252             .kw(kw)
2253             .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2254         }
2255       }
2256     }
2257   }
2258 }
2259 
TEST(Q8_AVGPOOL_UP9__SCALAR,kc_gt_1_fulltile)2260 TEST(Q8_AVGPOOL_UP9__SCALAR, kc_gt_1_fulltile) {
2261   auto tester = AvgPoolMicrokernelTester()
2262     .mr(9);
2263   for (size_t kh = 1; kh <= tester.mr(); kh++) {
2264     for (size_t kw = 1; kw <= tester.mr(); kw++) {
2265       if (kh * kw == tester.mr()) {
2266         for (size_t kc = 2; kc < 8; kc++) {
2267           tester
2268             .kh(kh)
2269             .kw(kw)
2270             .kc(kc)
2271             .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2272         }
2273       }
2274     }
2275   }
2276 }
2277 
TEST(Q8_AVGPOOL_UP9__SCALAR,kc_gt_1_subtile)2278 TEST(Q8_AVGPOOL_UP9__SCALAR, kc_gt_1_subtile) {
2279   auto tester = AvgPoolMicrokernelTester()
2280     .mr(9)
2281     .iterations(3);
2282   for (size_t ks = 2; ks < tester.mr(); ks++) {
2283     for (size_t kh = 1; kh <= ks; kh++) {
2284       for (size_t kw = 1; kw <= ks; kw++) {
2285         if (kh * kw == ks) {
2286           for (size_t kc = 2; kc < 8; kc++) {
2287             tester
2288               .kh(kh)
2289               .kw(kw)
2290               .kc(kc)
2291               .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2292           }
2293         }
2294       }
2295     }
2296   }
2297 }
2298 
TEST(Q8_AVGPOOL_UP9__SCALAR,kc_gt_1_fulltile_with_x_stride)2299 TEST(Q8_AVGPOOL_UP9__SCALAR, kc_gt_1_fulltile_with_x_stride) {
2300   auto tester = AvgPoolMicrokernelTester()
2301     .mr(9)
2302     .iterations(3);
2303   for (size_t kh = 1; kh <= tester.mr(); kh++) {
2304     for (size_t kw = 1; kw <= tester.mr(); kw++) {
2305       if (kh * kw == tester.mr()) {
2306         for (size_t kc = 2; kc < 8; kc++) {
2307           tester
2308             .kh(kh)
2309             .kw(kw)
2310             .kc(kc)
2311             .x_stride(23)
2312             .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2313         }
2314       }
2315     }
2316   }
2317 }
2318 
TEST(Q8_AVGPOOL_UP9__SCALAR,x_scale)2319 TEST(Q8_AVGPOOL_UP9__SCALAR, x_scale) {
2320   for (size_t n = 1; n <= 5; n += 2) {
2321     for (size_t kc = 1; kc < 8; kc += 3) {
2322       for (float x_scale = 0.01f; x_scale < 100.0f; x_scale *= 3.14159265f) {
2323         AvgPoolMicrokernelTester()
2324           .mr(9)
2325           .n(n)
2326           .kh(3)
2327           .kw(3)
2328           .kc(kc)
2329           .x_scale(x_scale)
2330           .iterations(2)
2331           .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2332       }
2333     }
2334   }
2335 }
2336 
TEST(Q8_AVGPOOL_UP9__SCALAR,x_zero_point)2337 TEST(Q8_AVGPOOL_UP9__SCALAR, x_zero_point) {
2338   for (size_t n = 1; n <= 5; n += 2) {
2339     for (size_t kc = 1; kc < 8; kc += 3) {
2340       for (int32_t x_zero_point = 0; x_zero_point <= 255; x_zero_point += 51) {
2341         AvgPoolMicrokernelTester()
2342           .mr(9)
2343           .n(n)
2344           .kh(3)
2345           .kw(3)
2346           .kc(kc)
2347           .x_zero_point(uint8_t(x_zero_point))
2348           .iterations(3)
2349           .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2350       }
2351     }
2352   }
2353 }
2354 
TEST(Q8_AVGPOOL_UP9__SCALAR,y_scale)2355 TEST(Q8_AVGPOOL_UP9__SCALAR, y_scale) {
2356   for (size_t n = 1; n <= 5; n += 2) {
2357     for (size_t kc = 1; kc < 8; kc += 3) {
2358       for (float y_scale = 0.01f; y_scale < 100.0f; y_scale *= 3.14159265f) {
2359         AvgPoolMicrokernelTester()
2360           .mr(9)
2361           .n(n)
2362           .kh(3)
2363           .kw(3)
2364           .kc(kc)
2365           .y_scale(y_scale)
2366           .iterations(2)
2367           .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2368       }
2369     }
2370   }
2371 }
2372 
TEST(Q8_AVGPOOL_UP9__SCALAR,y_zero_point)2373 TEST(Q8_AVGPOOL_UP9__SCALAR, y_zero_point) {
2374   for (size_t n = 1; n <= 5; n += 2) {
2375     for (size_t kc = 1; kc < 8; kc += 3) {
2376       for (int32_t y_zero_point = 0; y_zero_point <= 255; y_zero_point += 51) {
2377         AvgPoolMicrokernelTester()
2378           .mr(9)
2379           .n(n)
2380           .kh(3)
2381           .kw(3)
2382           .kc(kc)
2383           .y_zero_point(uint8_t(y_zero_point))
2384           .iterations(3)
2385           .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2386       }
2387     }
2388   }
2389 }
2390 
TEST(Q8_AVGPOOL_UP9__SCALAR,qmax)2391 TEST(Q8_AVGPOOL_UP9__SCALAR, qmax) {
2392   for (size_t n = 1; n <= 5; n += 2) {
2393     for (size_t kc = 1; kc < 8; kc += 3) {
2394       AvgPoolMicrokernelTester()
2395         .mr(9)
2396         .n(n)
2397         .kh(3)
2398         .kw(3)
2399         .kc(kc)
2400         .x_zero_point(128)
2401         .y_zero_point(128)
2402         .x_scale(1.0f)
2403         .y_scale(1.0f)
2404         .qmax(128)
2405         .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2406     }
2407   }
2408 }
2409 
TEST(Q8_AVGPOOL_UP9__SCALAR,qmin)2410 TEST(Q8_AVGPOOL_UP9__SCALAR, qmin) {
2411   for (size_t n = 1; n <= 5; n += 2) {
2412     for (size_t kc = 1; kc < 8; kc += 3) {
2413       AvgPoolMicrokernelTester()
2414         .mr(9)
2415         .n(n)
2416         .kh(3)
2417         .kw(3)
2418         .kc(kc)
2419         .x_zero_point(128)
2420         .y_zero_point(128)
2421         .x_scale(1.0f)
2422         .y_scale(1.0f)
2423         .qmin(128)
2424         .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2425     }
2426   }
2427 }
2428 
TEST(Q8_AVGPOOL_UP9__SCALAR,small_n)2429 TEST(Q8_AVGPOOL_UP9__SCALAR, small_n) {
2430   for (size_t n = 2; n < 5; n++) {
2431     for (size_t ks : std::vector<size_t>{{2, 3}}) {
2432       for (size_t kc = 1; kc < 8; kc += 3) {
2433         AvgPoolMicrokernelTester()
2434           .mr(9)
2435           .n(n)
2436           .kh(ks)
2437           .kw(ks)
2438           .kc(kc)
2439           .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2440       }
2441     }
2442   }
2443 }
2444 
TEST(Q8_AVGPOOL_UP9__SCALAR,small_n_with_x_stride)2445 TEST(Q8_AVGPOOL_UP9__SCALAR, small_n_with_x_stride) {
2446   for (size_t n = 2; n < 5; n++) {
2447     for (size_t ks : std::vector<size_t>{{2, 3}}) {
2448       for (size_t kc = 1; kc < 8; kc += 3) {
2449         AvgPoolMicrokernelTester()
2450           .mr(9)
2451           .n(n)
2452           .kh(ks)
2453           .kw(ks)
2454           .kc(kc)
2455           .x_stride(29)
2456           .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2457       }
2458     }
2459   }
2460 }
2461 
TEST(Q8_AVGPOOL_UP9__SCALAR,small_n_with_y_stride)2462 TEST(Q8_AVGPOOL_UP9__SCALAR, small_n_with_y_stride) {
2463   for (size_t n = 2; n < 5; n++) {
2464     for (size_t ks : std::vector<size_t>{{2, 3}}) {
2465       for (size_t kc = 1; kc < 8; kc += 3) {
2466         AvgPoolMicrokernelTester()
2467           .mr(9)
2468           .n(n)
2469           .kh(ks)
2470           .kw(ks)
2471           .kc(kc)
2472           .y_stride(31)
2473           .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2474       }
2475     }
2476   }
2477 }
2478 
TEST(Q8_AVGPOOL_UP9__SCALAR,small_n_with_s)2479 TEST(Q8_AVGPOOL_UP9__SCALAR, small_n_with_s) {
2480   for (size_t n = 2; n < 5; n++) {
2481     for (size_t ks : std::vector<size_t>{{2, 3}}) {
2482       for (size_t kc = 1; kc < 8; kc += 3) {
2483         for (size_t s = 2; s <= ks; s++) {
2484           AvgPoolMicrokernelTester()
2485             .mr(9)
2486             .n(n)
2487             .kh(ks)
2488             .kw(ks)
2489             .kc(kc)
2490             .s(s)
2491             .Test(xnn_q8_avgpool_ukernel_up9__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2492         }
2493       }
2494     }
2495   }
2496 }
2497 
TEST(Q8_AVGPOOL_MP9P8Q__SCALAR,kc_eq_1_twopass_fulltile)2498 TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, kc_eq_1_twopass_fulltile) {
2499   auto tester = AvgPoolMicrokernelTester()
2500     .mr(9)
2501     .qr(8)
2502     .kc(1);
2503   const size_t ks = tester.mr() + tester.qr();
2504   for (size_t kh = 1; kh <= ks; kh++) {
2505     for (size_t kw = 1; kw <= ks; kw++) {
2506       if (kh * kw == ks) {
2507         tester
2508           .kh(kh)
2509           .kw(kw)
2510           .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2511       }
2512     }
2513   }
2514 }
2515 
TEST(Q8_AVGPOOL_MP9P8Q__SCALAR,kc_eq_1_twopass_subtile)2516 TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, kc_eq_1_twopass_subtile) {
2517   auto tester = AvgPoolMicrokernelTester()
2518     .mr(9)
2519     .qr(8)
2520     .kc(1);
2521   for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
2522     tester
2523       .kh(ks)
2524       .kw(1)
2525       .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2526     tester
2527       .kh(1)
2528       .kw(ks)
2529       .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2530   }
2531 }
2532 
TEST(Q8_AVGPOOL_MP9P8Q__SCALAR,kc_eq_1_multipass_fulltile)2533 TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, kc_eq_1_multipass_fulltile) {
2534   for (size_t ks : std::vector<size_t>{{25, 49}}) {
2535     auto tester = AvgPoolMicrokernelTester()
2536       .mr(9)
2537       .qr(8)
2538       .kc(1);
2539     for (size_t kh = 1; kh <= ks; kh++) {
2540       for (size_t kw = 1; kw <= ks; kw++) {
2541         if (kh * kw == ks) {
2542           tester
2543             .kh(kh)
2544             .kw(kw)
2545             .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2546         }
2547       }
2548     }
2549   }
2550 }
2551 
TEST(Q8_AVGPOOL_MP9P8Q__SCALAR,kc_eq_1_multipass_subtile)2552 TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, kc_eq_1_multipass_subtile) {
2553   for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
2554     auto tester = AvgPoolMicrokernelTester()
2555       .mr(9)
2556       .qr(8)
2557       .kc(1);
2558     for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
2559       tester
2560         .kh(ks)
2561         .kw(1)
2562         .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2563       tester
2564         .kh(1)
2565         .kw(ks)
2566         .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2567     }
2568   }
2569 }
2570 
TEST(Q8_AVGPOOL_MP9P8Q__SCALAR,kc_gt_1_twopass_fulltile)2571 TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, kc_gt_1_twopass_fulltile) {
2572   auto tester = AvgPoolMicrokernelTester()
2573     .mr(9)
2574     .qr(8)
2575     .iterations(3);
2576   const size_t ks = tester.mr() + tester.qr();
2577   for (size_t kh = 1; kh <= ks; kh++) {
2578     for (size_t kw = 1; kw <= ks; kw++) {
2579       if (kh * kw == ks) {
2580         for (size_t kc = 2; kc < 8; kc++) {
2581           tester
2582             .kh(kh)
2583             .kw(kw)
2584             .kc(kc)
2585             .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2586         }
2587       }
2588     }
2589   }
2590 }
2591 
TEST(Q8_AVGPOOL_MP9P8Q__SCALAR,kc_gt_1_twopass_subtile)2592 TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, kc_gt_1_twopass_subtile) {
2593   auto tester = AvgPoolMicrokernelTester()
2594     .mr(9)
2595     .qr(8)
2596     .iterations(3);
2597   for (size_t ks = 10; ks < tester.mr() + tester.qr(); ks++) {
2598     for (size_t kc = 2; kc < 8; kc++) {
2599       tester
2600         .kc(kc)
2601         .kh(ks)
2602         .kw(1)
2603         .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2604       tester
2605         .kc(kc)
2606         .kh(1)
2607         .kw(ks)
2608         .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2609     }
2610   }
2611 }
2612 
TEST(Q8_AVGPOOL_MP9P8Q__SCALAR,kc_gt_1_twopass_fulltile_with_x_stride)2613 TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, kc_gt_1_twopass_fulltile_with_x_stride) {
2614   auto tester = AvgPoolMicrokernelTester()
2615     .mr(9)
2616     .qr(8)
2617     .iterations(3);
2618   const size_t ks = tester.mr() + tester.qr();
2619   for (size_t kh = 1; kh <= ks; kh++) {
2620     for (size_t kw = 1; kw <= ks; kw++) {
2621       if (kh * kw == ks) {
2622         for (size_t kc = 2; kc < 8; kc++) {
2623           tester
2624             .kh(kh)
2625             .kw(kw)
2626             .kc(kc)
2627             .x_stride(23)
2628             .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2629         }
2630       }
2631     }
2632   }
2633 }
2634 
TEST(Q8_AVGPOOL_MP9P8Q__SCALAR,kc_gt_1_multipass_fulltile)2635 TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, kc_gt_1_multipass_fulltile) {
2636   for (size_t ks : std::vector<size_t>{{25, 49}}) {
2637     auto tester = AvgPoolMicrokernelTester()
2638       .mr(9)
2639       .qr(8)
2640       .iterations(3);
2641     for (size_t kh = 1; kh <= ks; kh++) {
2642       for (size_t kw = 1; kw <= ks; kw++) {
2643         if (kh * kw == ks) {
2644           for (size_t kc = 2; kc < 8; kc++) {
2645             tester
2646               .kh(kh)
2647               .kw(kw)
2648               .kc(kc)
2649               .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2650           }
2651         }
2652       }
2653     }
2654   }
2655 }
2656 
TEST(Q8_AVGPOOL_MP9P8Q__SCALAR,kc_gt_1_multipass_subtile)2657 TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, kc_gt_1_multipass_subtile) {
2658   for (size_t ks_max : std::vector<size_t>{{25, 49}}) {
2659     auto tester = AvgPoolMicrokernelTester()
2660       .mr(9)
2661       .qr(8)
2662       .iterations(3);
2663     for (size_t ks = ks_max - tester.qr() + 1; ks < ks_max; ks++) {
2664       for (size_t kc = 2; kc < 8; kc++) {
2665         tester
2666           .kc(kc)
2667           .kh(ks)
2668           .kw(1)
2669           .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2670         tester
2671           .kc(kc)
2672           .kh(1)
2673           .kw(ks)
2674           .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2675       }
2676     }
2677   }
2678 }
2679 
TEST(Q8_AVGPOOL_MP9P8Q__SCALAR,kc_gt_1_multipass_fulltile_with_x_stride)2680 TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, kc_gt_1_multipass_fulltile_with_x_stride) {
2681   for (size_t ks : std::vector<size_t>{{25, 49}}) {
2682     auto tester = AvgPoolMicrokernelTester()
2683       .mr(9)
2684       .qr(8)
2685       .iterations(3);
2686     for (size_t kh = 1; kh <= ks; kh++) {
2687       for (size_t kw = 1; kw <= ks; kw++) {
2688         if (kh * kw == ks) {
2689           for (size_t kc = 2; kc < 8; kc++) {
2690             tester
2691               .kh(kh)
2692               .kw(kw)
2693               .kc(kc)
2694               .x_stride(23)
2695               .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2696           }
2697         }
2698       }
2699     }
2700   }
2701 }
2702 
TEST(Q8_AVGPOOL_MP9P8Q__SCALAR,x_scale)2703 TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, x_scale) {
2704   for (size_t n = 1; n <= 5; n += 2) {
2705     for (size_t kc = 1; kc < 8; kc += 3) {
2706       for (float x_scale = 0.01f; x_scale < 100.0f; x_scale *= 3.14159265f) {
2707         AvgPoolMicrokernelTester()
2708           .mr(9)
2709           .qr(8)
2710           .n(n)
2711           .kh(5)
2712           .kw(5)
2713           .kc(kc)
2714           .x_scale(x_scale)
2715           .iterations(1)
2716           .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2717       }
2718     }
2719   }
2720 }
2721 
TEST(Q8_AVGPOOL_MP9P8Q__SCALAR,x_zero_point)2722 TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, x_zero_point) {
2723   for (size_t n = 1; n <= 5; n += 2) {
2724     for (size_t kc = 1; kc < 8; kc += 3) {
2725       for (int32_t x_zero_point = 0; x_zero_point <= 255; x_zero_point += 51) {
2726         AvgPoolMicrokernelTester()
2727           .mr(9)
2728           .qr(8)
2729           .n(n)
2730           .kh(5)
2731           .kw(5)
2732           .kc(kc)
2733           .x_zero_point(uint8_t(x_zero_point))
2734           .iterations(1)
2735           .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2736       }
2737     }
2738   }
2739 }
2740 
TEST(Q8_AVGPOOL_MP9P8Q__SCALAR,y_scale)2741 TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, y_scale) {
2742   for (size_t n = 1; n <= 5; n += 2) {
2743     for (size_t kc = 1; kc < 8; kc += 3) {
2744       for (float y_scale = 0.01f; y_scale < 100.0f; y_scale *= 3.14159265f) {
2745         AvgPoolMicrokernelTester()
2746           .mr(9)
2747           .qr(8)
2748           .n(n)
2749           .kh(5)
2750           .kw(5)
2751           .kc(kc)
2752           .y_scale(y_scale)
2753           .iterations(1)
2754           .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2755       }
2756     }
2757   }
2758 }
2759 
TEST(Q8_AVGPOOL_MP9P8Q__SCALAR,y_zero_point)2760 TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, y_zero_point) {
2761   for (size_t n = 1; n <= 5; n += 2) {
2762     for (size_t kc = 1; kc < 8; kc += 3) {
2763       for (int32_t y_zero_point = 0; y_zero_point <= 255; y_zero_point += 51) {
2764         AvgPoolMicrokernelTester()
2765           .mr(9)
2766           .qr(8)
2767           .n(n)
2768           .kh(5)
2769           .kw(5)
2770           .kc(kc)
2771           .y_zero_point(uint8_t(y_zero_point))
2772           .iterations(1)
2773           .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2774       }
2775     }
2776   }
2777 }
2778 
TEST(Q8_AVGPOOL_MP9P8Q__SCALAR,qmax)2779 TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, qmax) {
2780   for (size_t n = 1; n <= 5; n += 2) {
2781     for (size_t kc = 1; kc < 8; kc += 3) {
2782       AvgPoolMicrokernelTester()
2783         .mr(9)
2784         .qr(8)
2785         .n(n)
2786         .kh(5)
2787         .kw(5)
2788         .kc(kc)
2789         .x_zero_point(128)
2790         .y_zero_point(128)
2791         .x_scale(1.0f)
2792         .y_scale(1.0f)
2793         .qmax(128)
2794         .iterations(3)
2795         .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2796     }
2797   }
2798 }
2799 
TEST(Q8_AVGPOOL_MP9P8Q__SCALAR,qmin)2800 TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, qmin) {
2801   for (size_t n = 1; n <= 5; n += 2) {
2802     for (size_t kc = 1; kc < 8; kc += 3) {
2803       AvgPoolMicrokernelTester()
2804         .mr(9)
2805         .qr(8)
2806         .n(n)
2807         .kh(5)
2808         .kw(5)
2809         .kc(kc)
2810         .x_zero_point(128)
2811         .y_zero_point(128)
2812         .x_scale(1.0f)
2813         .y_scale(1.0f)
2814         .qmin(128)
2815         .iterations(3)
2816         .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2817     }
2818   }
2819 }
2820 
TEST(Q8_AVGPOOL_MP9P8Q__SCALAR,small_n)2821 TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, small_n) {
2822   for (size_t n = 2; n < 5; n++) {
2823     for (size_t ks : std::vector<size_t>{{5, 7}}) {
2824       for (size_t kc = 1; kc < 8; kc += 3) {
2825         AvgPoolMicrokernelTester()
2826           .mr(9)
2827           .qr(8)
2828           .n(n)
2829           .kh(ks)
2830           .kw(ks)
2831           .kc(kc)
2832           .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2833       }
2834     }
2835   }
2836 }
2837 
TEST(Q8_AVGPOOL_MP9P8Q__SCALAR,small_n_with_x_stride)2838 TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, small_n_with_x_stride) {
2839   for (size_t n = 2; n < 5; n++) {
2840     for (size_t ks : std::vector<size_t>{{5, 7}}) {
2841       for (size_t kc = 1; kc < 8; kc += 3) {
2842         AvgPoolMicrokernelTester()
2843           .mr(9)
2844           .qr(8)
2845           .n(n)
2846           .kh(ks)
2847           .kw(ks)
2848           .kc(kc)
2849           .x_stride(29)
2850           .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2851       }
2852     }
2853   }
2854 }
2855 
TEST(Q8_AVGPOOL_MP9P8Q__SCALAR,small_n_with_y_stride)2856 TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, small_n_with_y_stride) {
2857   for (size_t n = 2; n < 5; n++) {
2858     for (size_t ks : std::vector<size_t>{{5, 7}}) {
2859       for (size_t kc = 1; kc < 8; kc += 3) {
2860         AvgPoolMicrokernelTester()
2861           .mr(9)
2862           .qr(8)
2863           .n(n)
2864           .kh(ks)
2865           .kw(ks)
2866           .kc(kc)
2867           .y_stride(31)
2868           .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2869       }
2870     }
2871   }
2872 }
2873 
TEST(Q8_AVGPOOL_MP9P8Q__SCALAR,small_n_with_s)2874 TEST(Q8_AVGPOOL_MP9P8Q__SCALAR, small_n_with_s) {
2875   for (size_t n = 2; n < 5; n++) {
2876     for (size_t ks : std::vector<size_t>{{5, 7}}) {
2877       for (size_t s = 2; s <= 5; s++) {
2878         for (size_t kc = 1; kc < 8; kc += 3) {
2879           AvgPoolMicrokernelTester()
2880             .mr(9)
2881             .qr(8)
2882             .n(n)
2883             .kh(ks)
2884             .kw(ks)
2885             .kc(kc)
2886             .s(s)
2887             .Test(xnn_q8_avgpool_ukernel_mp9p8q__scalar, AvgPoolMicrokernelTester::Variant::Scalar);
2888         }
2889       }
2890     }
2891   }
2892 }
2893