• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 //   Specification: test/qu8-igemm-minmax-rndnu.yaml
11 //   Generator: tools/generate-gemm-test.py
12 
13 
14 #include <gtest/gtest.h>
15 
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20 
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25 
26 
27 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8)28   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8) {
29     TEST_REQUIRES_ARM_NEON;
30     GemmMicrokernelTester()
31       .mr(1)
32       .nr(8)
33       .kr(1)
34       .sr(1)
35       .m(1)
36       .n(8)
37       .k(8)
38       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
39   }
40 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,strided_cn)41   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, strided_cn) {
42     TEST_REQUIRES_ARM_NEON;
43     GemmMicrokernelTester()
44       .mr(1)
45       .nr(8)
46       .kr(1)
47       .sr(1)
48       .m(1)
49       .n(8)
50       .k(8)
51       .cn_stride(11)
52       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
53   }
54 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8_subtile)55   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8_subtile) {
56     TEST_REQUIRES_ARM_NEON;
57     for (uint32_t n = 1; n <= 8; n++) {
58       for (uint32_t m = 1; m <= 1; m++) {
59         GemmMicrokernelTester()
60           .mr(1)
61           .nr(8)
62           .kr(1)
63           .sr(1)
64           .m(m)
65           .n(n)
66           .k(8)
67           .iterations(1)
68           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
69       }
70     }
71   }
72 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8_subtile_m)73   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8_subtile_m) {
74     TEST_REQUIRES_ARM_NEON;
75     for (uint32_t m = 1; m <= 1; m++) {
76       GemmMicrokernelTester()
77         .mr(1)
78         .nr(8)
79         .kr(1)
80         .sr(1)
81         .m(m)
82         .n(8)
83         .k(8)
84         .iterations(1)
85         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
86     }
87   }
88 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8_subtile_n)89   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8_subtile_n) {
90     TEST_REQUIRES_ARM_NEON;
91     for (uint32_t n = 1; n <= 8; n++) {
92       GemmMicrokernelTester()
93         .mr(1)
94         .nr(8)
95         .kr(1)
96         .sr(1)
97         .m(1)
98         .n(n)
99         .k(8)
100         .iterations(1)
101         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
102     }
103   }
104 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_lt_8)105   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_lt_8) {
106     TEST_REQUIRES_ARM_NEON;
107     for (size_t k = 1; k < 8; k++) {
108       GemmMicrokernelTester()
109         .mr(1)
110         .nr(8)
111         .kr(1)
112         .sr(1)
113         .m(1)
114         .n(8)
115         .k(k)
116         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
117     }
118   }
119 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_lt_8_subtile)120   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_lt_8_subtile) {
121     TEST_REQUIRES_ARM_NEON;
122     for (size_t k = 1; k < 8; k++) {
123       for (uint32_t n = 1; n <= 8; n++) {
124         for (uint32_t m = 1; m <= 1; m++) {
125           GemmMicrokernelTester()
126             .mr(1)
127             .nr(8)
128             .kr(1)
129             .sr(1)
130             .m(m)
131             .n(n)
132             .k(k)
133             .iterations(1)
134             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
135         }
136       }
137     }
138   }
139 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_gt_8)140   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_gt_8) {
141     TEST_REQUIRES_ARM_NEON;
142     for (size_t k = 9; k < 16; k++) {
143       GemmMicrokernelTester()
144         .mr(1)
145         .nr(8)
146         .kr(1)
147         .sr(1)
148         .m(1)
149         .n(8)
150         .k(k)
151         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
152     }
153   }
154 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_gt_8_subtile)155   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_gt_8_subtile) {
156     TEST_REQUIRES_ARM_NEON;
157     for (size_t k = 9; k < 16; k++) {
158       for (uint32_t n = 1; n <= 8; n++) {
159         for (uint32_t m = 1; m <= 1; m++) {
160           GemmMicrokernelTester()
161             .mr(1)
162             .nr(8)
163             .kr(1)
164             .sr(1)
165             .m(m)
166             .n(n)
167             .k(k)
168             .iterations(1)
169             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
170         }
171       }
172     }
173   }
174 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_div_8)175   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_div_8) {
176     TEST_REQUIRES_ARM_NEON;
177     for (size_t k = 16; k <= 80; k += 8) {
178       GemmMicrokernelTester()
179         .mr(1)
180         .nr(8)
181         .kr(1)
182         .sr(1)
183         .m(1)
184         .n(8)
185         .k(k)
186         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
187     }
188   }
189 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_div_8_subtile)190   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_div_8_subtile) {
191     TEST_REQUIRES_ARM_NEON;
192     for (size_t k = 16; k <= 80; k += 8) {
193       for (uint32_t n = 1; n <= 8; n++) {
194         for (uint32_t m = 1; m <= 1; m++) {
195           GemmMicrokernelTester()
196             .mr(1)
197             .nr(8)
198             .kr(1)
199             .sr(1)
200             .m(m)
201             .n(n)
202             .k(k)
203             .iterations(1)
204             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
205         }
206       }
207     }
208   }
209 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8)210   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8) {
211     TEST_REQUIRES_ARM_NEON;
212     for (uint32_t n = 9; n < 16; n++) {
213       for (size_t k = 1; k <= 40; k += 9) {
214         GemmMicrokernelTester()
215           .mr(1)
216           .nr(8)
217           .kr(1)
218           .sr(1)
219           .m(1)
220           .n(n)
221           .k(k)
222           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
223       }
224     }
225   }
226 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8_strided_cn)227   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8_strided_cn) {
228     TEST_REQUIRES_ARM_NEON;
229     for (uint32_t n = 9; n < 16; n++) {
230       for (size_t k = 1; k <= 40; k += 9) {
231         GemmMicrokernelTester()
232           .mr(1)
233           .nr(8)
234           .kr(1)
235           .sr(1)
236           .m(1)
237           .n(n)
238           .k(k)
239           .cn_stride(11)
240           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
241       }
242     }
243   }
244 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8_subtile)245   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8_subtile) {
246     TEST_REQUIRES_ARM_NEON;
247     for (uint32_t n = 9; n < 16; n++) {
248       for (size_t k = 1; k <= 40; k += 9) {
249         for (uint32_t m = 1; m <= 1; m++) {
250           GemmMicrokernelTester()
251             .mr(1)
252             .nr(8)
253             .kr(1)
254             .sr(1)
255             .m(m)
256             .n(n)
257             .k(k)
258             .iterations(1)
259             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
260         }
261       }
262     }
263   }
264 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8)265   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8) {
266     TEST_REQUIRES_ARM_NEON;
267     for (uint32_t n = 16; n <= 24; n += 8) {
268       for (size_t k = 1; k <= 40; k += 9) {
269         GemmMicrokernelTester()
270           .mr(1)
271           .nr(8)
272           .kr(1)
273           .sr(1)
274           .m(1)
275           .n(n)
276           .k(k)
277           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
278       }
279     }
280   }
281 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8_strided_cn)282   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8_strided_cn) {
283     TEST_REQUIRES_ARM_NEON;
284     for (uint32_t n = 16; n <= 24; n += 8) {
285       for (size_t k = 1; k <= 40; k += 9) {
286         GemmMicrokernelTester()
287           .mr(1)
288           .nr(8)
289           .kr(1)
290           .sr(1)
291           .m(1)
292           .n(n)
293           .k(k)
294           .cn_stride(11)
295           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
296       }
297     }
298   }
299 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8_subtile)300   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8_subtile) {
301     TEST_REQUIRES_ARM_NEON;
302     for (uint32_t n = 16; n <= 24; n += 8) {
303       for (size_t k = 1; k <= 40; k += 9) {
304         for (uint32_t m = 1; m <= 1; m++) {
305           GemmMicrokernelTester()
306             .mr(1)
307             .nr(8)
308             .kr(1)
309             .sr(1)
310             .m(m)
311             .n(n)
312             .k(k)
313             .iterations(1)
314             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
315         }
316       }
317     }
318   }
319 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,small_kernel)320   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, small_kernel) {
321     TEST_REQUIRES_ARM_NEON;
322     for (size_t k = 1; k <= 40; k += 9) {
323       GemmMicrokernelTester()
324         .mr(1)
325         .nr(8)
326         .kr(1)
327         .sr(1)
328         .m(1)
329         .n(8)
330         .k(k)
331         .ks(3)
332         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
333     }
334   }
335 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,small_kernel_subtile)336   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, small_kernel_subtile) {
337     TEST_REQUIRES_ARM_NEON;
338     for (size_t k = 1; k <= 40; k += 9) {
339       for (uint32_t n = 1; n <= 8; n++) {
340         for (uint32_t m = 1; m <= 1; m++) {
341           GemmMicrokernelTester()
342             .mr(1)
343             .nr(8)
344             .kr(1)
345             .sr(1)
346             .m(m)
347             .n(n)
348             .k(k)
349             .ks(3)
350             .iterations(1)
351             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
352         }
353       }
354     }
355   }
356 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8_small_kernel)357   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8_small_kernel) {
358     TEST_REQUIRES_ARM_NEON;
359     for (uint32_t n = 9; n < 16; n++) {
360       for (size_t k = 1; k <= 40; k += 9) {
361         GemmMicrokernelTester()
362           .mr(1)
363           .nr(8)
364           .kr(1)
365           .sr(1)
366           .m(1)
367           .n(n)
368           .k(k)
369           .ks(3)
370           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
371       }
372     }
373   }
374 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8_small_kernel)375   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8_small_kernel) {
376     TEST_REQUIRES_ARM_NEON;
377     for (uint32_t n = 16; n <= 24; n += 8) {
378       for (size_t k = 1; k <= 40; k += 9) {
379         GemmMicrokernelTester()
380           .mr(1)
381           .nr(8)
382           .kr(1)
383           .sr(1)
384           .m(1)
385           .n(n)
386           .k(k)
387           .ks(3)
388           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
389       }
390     }
391   }
392 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,strided_cm_subtile)393   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, strided_cm_subtile) {
394     TEST_REQUIRES_ARM_NEON;
395     for (size_t k = 1; k <= 40; k += 9) {
396       for (uint32_t n = 1; n <= 8; n++) {
397         for (uint32_t m = 1; m <= 1; m++) {
398           GemmMicrokernelTester()
399             .mr(1)
400             .nr(8)
401             .kr(1)
402             .sr(1)
403             .m(m)
404             .n(n)
405             .k(k)
406             .cm_stride(11)
407             .iterations(1)
408             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
409         }
410       }
411     }
412   }
413 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,a_offset)414   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, a_offset) {
415     TEST_REQUIRES_ARM_NEON;
416     for (size_t k = 1; k <= 40; k += 9) {
417       GemmMicrokernelTester()
418         .mr(1)
419         .nr(8)
420         .kr(1)
421         .sr(1)
422         .m(1)
423         .n(8)
424         .k(k)
425         .ks(3)
426         .a_offset(43)
427         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
428     }
429   }
430 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,zero)431   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, zero) {
432     TEST_REQUIRES_ARM_NEON;
433     for (size_t k = 1; k <= 40; k += 9) {
434       for (uint32_t mz = 0; mz < 1; mz++) {
435         GemmMicrokernelTester()
436           .mr(1)
437           .nr(8)
438           .kr(1)
439           .sr(1)
440           .m(1)
441           .n(8)
442           .k(k)
443           .ks(3)
444           .a_offset(43)
445           .zero_index(mz)
446           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
447       }
448     }
449   }
450 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,qmin)451   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, qmin) {
452     TEST_REQUIRES_ARM_NEON;
453     GemmMicrokernelTester()
454       .mr(1)
455       .nr(8)
456       .kr(1)
457       .sr(1)
458       .m(1)
459       .n(8)
460       .k(8)
461       .qmin(128)
462       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
463   }
464 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,qmax)465   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, qmax) {
466     TEST_REQUIRES_ARM_NEON;
467     GemmMicrokernelTester()
468       .mr(1)
469       .nr(8)
470       .kr(1)
471       .sr(1)
472       .m(1)
473       .n(8)
474       .k(8)
475       .qmax(128)
476       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
477   }
478 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,strided_cm)479   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, strided_cm) {
480     TEST_REQUIRES_ARM_NEON;
481     GemmMicrokernelTester()
482       .mr(1)
483       .nr(8)
484       .kr(1)
485       .sr(1)
486       .m(1)
487       .n(8)
488       .k(8)
489       .cm_stride(11)
490       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
491   }
492 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,no_a_zero_point)493   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, no_a_zero_point) {
494     TEST_REQUIRES_ARM_NEON;
495     for (size_t k = 1; k <= 40; k += 9) {
496       GemmMicrokernelTester()
497         .mr(1)
498         .nr(8)
499         .kr(1)
500         .sr(1)
501         .m(1)
502         .n(8)
503         .k(k)
504         .a_zero_point(0)
505         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
506     }
507   }
508 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,no_b_zero_point)509   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, no_b_zero_point) {
510     TEST_REQUIRES_ARM_NEON;
511     for (size_t k = 1; k <= 40; k += 9) {
512       GemmMicrokernelTester()
513         .mr(1)
514         .nr(8)
515         .kr(1)
516         .sr(1)
517         .m(1)
518         .n(8)
519         .k(k)
520         .b_zero_point(0)
521         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
522     }
523   }
524 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,no_zero_point)525   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, no_zero_point) {
526     TEST_REQUIRES_ARM_NEON;
527     for (size_t k = 1; k <= 40; k += 9) {
528       GemmMicrokernelTester()
529         .mr(1)
530         .nr(8)
531         .kr(1)
532         .sr(1)
533         .m(1)
534         .n(8)
535         .k(k)
536         .a_zero_point(0)
537         .b_zero_point(0)
538         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
539     }
540   }
541 #endif  // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
542 
543 
544 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_eq_8)545   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_eq_8) {
546     TEST_REQUIRES_ARM_NEON;
547     GemmMicrokernelTester()
548       .mr(4)
549       .nr(8)
550       .kr(1)
551       .sr(1)
552       .m(4)
553       .n(8)
554       .k(8)
555       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
556   }
557 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,strided_cn)558   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, strided_cn) {
559     TEST_REQUIRES_ARM_NEON;
560     GemmMicrokernelTester()
561       .mr(4)
562       .nr(8)
563       .kr(1)
564       .sr(1)
565       .m(4)
566       .n(8)
567       .k(8)
568       .cn_stride(11)
569       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
570   }
571 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile)572   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile) {
573     TEST_REQUIRES_ARM_NEON;
574     for (uint32_t n = 1; n <= 8; n++) {
575       for (uint32_t m = 1; m <= 4; m++) {
576         GemmMicrokernelTester()
577           .mr(4)
578           .nr(8)
579           .kr(1)
580           .sr(1)
581           .m(m)
582           .n(n)
583           .k(8)
584           .iterations(1)
585           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
586       }
587     }
588   }
589 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile_m)590   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_m) {
591     TEST_REQUIRES_ARM_NEON;
592     for (uint32_t m = 1; m <= 4; m++) {
593       GemmMicrokernelTester()
594         .mr(4)
595         .nr(8)
596         .kr(1)
597         .sr(1)
598         .m(m)
599         .n(8)
600         .k(8)
601         .iterations(1)
602         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
603     }
604   }
605 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile_n)606   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_n) {
607     TEST_REQUIRES_ARM_NEON;
608     for (uint32_t n = 1; n <= 8; n++) {
609       GemmMicrokernelTester()
610         .mr(4)
611         .nr(8)
612         .kr(1)
613         .sr(1)
614         .m(4)
615         .n(n)
616         .k(8)
617         .iterations(1)
618         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
619     }
620   }
621 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_lt_8)622   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_lt_8) {
623     TEST_REQUIRES_ARM_NEON;
624     for (size_t k = 1; k < 8; k++) {
625       GemmMicrokernelTester()
626         .mr(4)
627         .nr(8)
628         .kr(1)
629         .sr(1)
630         .m(4)
631         .n(8)
632         .k(k)
633         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
634     }
635   }
636 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_lt_8_subtile)637   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_lt_8_subtile) {
638     TEST_REQUIRES_ARM_NEON;
639     for (size_t k = 1; k < 8; k++) {
640       for (uint32_t n = 1; n <= 8; n++) {
641         for (uint32_t m = 1; m <= 4; m++) {
642           GemmMicrokernelTester()
643             .mr(4)
644             .nr(8)
645             .kr(1)
646             .sr(1)
647             .m(m)
648             .n(n)
649             .k(k)
650             .iterations(1)
651             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
652         }
653       }
654     }
655   }
656 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_gt_8)657   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_gt_8) {
658     TEST_REQUIRES_ARM_NEON;
659     for (size_t k = 9; k < 16; k++) {
660       GemmMicrokernelTester()
661         .mr(4)
662         .nr(8)
663         .kr(1)
664         .sr(1)
665         .m(4)
666         .n(8)
667         .k(k)
668         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
669     }
670   }
671 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_gt_8_subtile)672   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_gt_8_subtile) {
673     TEST_REQUIRES_ARM_NEON;
674     for (size_t k = 9; k < 16; k++) {
675       for (uint32_t n = 1; n <= 8; n++) {
676         for (uint32_t m = 1; m <= 4; m++) {
677           GemmMicrokernelTester()
678             .mr(4)
679             .nr(8)
680             .kr(1)
681             .sr(1)
682             .m(m)
683             .n(n)
684             .k(k)
685             .iterations(1)
686             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
687         }
688       }
689     }
690   }
691 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_div_8)692   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_div_8) {
693     TEST_REQUIRES_ARM_NEON;
694     for (size_t k = 16; k <= 80; k += 8) {
695       GemmMicrokernelTester()
696         .mr(4)
697         .nr(8)
698         .kr(1)
699         .sr(1)
700         .m(4)
701         .n(8)
702         .k(k)
703         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
704     }
705   }
706 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_div_8_subtile)707   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_div_8_subtile) {
708     TEST_REQUIRES_ARM_NEON;
709     for (size_t k = 16; k <= 80; k += 8) {
710       for (uint32_t n = 1; n <= 8; n++) {
711         for (uint32_t m = 1; m <= 4; m++) {
712           GemmMicrokernelTester()
713             .mr(4)
714             .nr(8)
715             .kr(1)
716             .sr(1)
717             .m(m)
718             .n(n)
719             .k(k)
720             .iterations(1)
721             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
722         }
723       }
724     }
725   }
726 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_gt_8)727   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_gt_8) {
728     TEST_REQUIRES_ARM_NEON;
729     for (uint32_t n = 9; n < 16; n++) {
730       for (size_t k = 1; k <= 40; k += 9) {
731         GemmMicrokernelTester()
732           .mr(4)
733           .nr(8)
734           .kr(1)
735           .sr(1)
736           .m(4)
737           .n(n)
738           .k(k)
739           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
740       }
741     }
742   }
743 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_gt_8_strided_cn)744   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_gt_8_strided_cn) {
745     TEST_REQUIRES_ARM_NEON;
746     for (uint32_t n = 9; n < 16; n++) {
747       for (size_t k = 1; k <= 40; k += 9) {
748         GemmMicrokernelTester()
749           .mr(4)
750           .nr(8)
751           .kr(1)
752           .sr(1)
753           .m(4)
754           .n(n)
755           .k(k)
756           .cn_stride(11)
757           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
758       }
759     }
760   }
761 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_gt_8_subtile)762   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_gt_8_subtile) {
763     TEST_REQUIRES_ARM_NEON;
764     for (uint32_t n = 9; n < 16; n++) {
765       for (size_t k = 1; k <= 40; k += 9) {
766         for (uint32_t m = 1; m <= 4; m++) {
767           GemmMicrokernelTester()
768             .mr(4)
769             .nr(8)
770             .kr(1)
771             .sr(1)
772             .m(m)
773             .n(n)
774             .k(k)
775             .iterations(1)
776             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
777         }
778       }
779     }
780   }
781 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_div_8)782   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_div_8) {
783     TEST_REQUIRES_ARM_NEON;
784     for (uint32_t n = 16; n <= 24; n += 8) {
785       for (size_t k = 1; k <= 40; k += 9) {
786         GemmMicrokernelTester()
787           .mr(4)
788           .nr(8)
789           .kr(1)
790           .sr(1)
791           .m(4)
792           .n(n)
793           .k(k)
794           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
795       }
796     }
797   }
798 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_div_8_strided_cn)799   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_div_8_strided_cn) {
800     TEST_REQUIRES_ARM_NEON;
801     for (uint32_t n = 16; n <= 24; n += 8) {
802       for (size_t k = 1; k <= 40; k += 9) {
803         GemmMicrokernelTester()
804           .mr(4)
805           .nr(8)
806           .kr(1)
807           .sr(1)
808           .m(4)
809           .n(n)
810           .k(k)
811           .cn_stride(11)
812           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
813       }
814     }
815   }
816 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_div_8_subtile)817   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_div_8_subtile) {
818     TEST_REQUIRES_ARM_NEON;
819     for (uint32_t n = 16; n <= 24; n += 8) {
820       for (size_t k = 1; k <= 40; k += 9) {
821         for (uint32_t m = 1; m <= 4; m++) {
822           GemmMicrokernelTester()
823             .mr(4)
824             .nr(8)
825             .kr(1)
826             .sr(1)
827             .m(m)
828             .n(n)
829             .k(k)
830             .iterations(1)
831             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
832         }
833       }
834     }
835   }
836 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,small_kernel)837   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, small_kernel) {
838     TEST_REQUIRES_ARM_NEON;
839     for (size_t k = 1; k <= 40; k += 9) {
840       GemmMicrokernelTester()
841         .mr(4)
842         .nr(8)
843         .kr(1)
844         .sr(1)
845         .m(4)
846         .n(8)
847         .k(k)
848         .ks(3)
849         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
850     }
851   }
852 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,small_kernel_subtile)853   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, small_kernel_subtile) {
854     TEST_REQUIRES_ARM_NEON;
855     for (size_t k = 1; k <= 40; k += 9) {
856       for (uint32_t n = 1; n <= 8; n++) {
857         for (uint32_t m = 1; m <= 4; m++) {
858           GemmMicrokernelTester()
859             .mr(4)
860             .nr(8)
861             .kr(1)
862             .sr(1)
863             .m(m)
864             .n(n)
865             .k(k)
866             .ks(3)
867             .iterations(1)
868             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
869         }
870       }
871     }
872   }
873 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_gt_8_small_kernel)874   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_gt_8_small_kernel) {
875     TEST_REQUIRES_ARM_NEON;
876     for (uint32_t n = 9; n < 16; n++) {
877       for (size_t k = 1; k <= 40; k += 9) {
878         GemmMicrokernelTester()
879           .mr(4)
880           .nr(8)
881           .kr(1)
882           .sr(1)
883           .m(4)
884           .n(n)
885           .k(k)
886           .ks(3)
887           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
888       }
889     }
890   }
891 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_div_8_small_kernel)892   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_div_8_small_kernel) {
893     TEST_REQUIRES_ARM_NEON;
894     for (uint32_t n = 16; n <= 24; n += 8) {
895       for (size_t k = 1; k <= 40; k += 9) {
896         GemmMicrokernelTester()
897           .mr(4)
898           .nr(8)
899           .kr(1)
900           .sr(1)
901           .m(4)
902           .n(n)
903           .k(k)
904           .ks(3)
905           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
906       }
907     }
908   }
909 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,strided_cm_subtile)910   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, strided_cm_subtile) {
911     TEST_REQUIRES_ARM_NEON;
912     for (size_t k = 1; k <= 40; k += 9) {
913       for (uint32_t n = 1; n <= 8; n++) {
914         for (uint32_t m = 1; m <= 4; m++) {
915           GemmMicrokernelTester()
916             .mr(4)
917             .nr(8)
918             .kr(1)
919             .sr(1)
920             .m(m)
921             .n(n)
922             .k(k)
923             .cm_stride(11)
924             .iterations(1)
925             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
926         }
927       }
928     }
929   }
930 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,a_offset)931   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, a_offset) {
932     TEST_REQUIRES_ARM_NEON;
933     for (size_t k = 1; k <= 40; k += 9) {
934       GemmMicrokernelTester()
935         .mr(4)
936         .nr(8)
937         .kr(1)
938         .sr(1)
939         .m(4)
940         .n(8)
941         .k(k)
942         .ks(3)
943         .a_offset(163)
944         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
945     }
946   }
947 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,zero)948   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, zero) {
949     TEST_REQUIRES_ARM_NEON;
950     for (size_t k = 1; k <= 40; k += 9) {
951       for (uint32_t mz = 0; mz < 4; mz++) {
952         GemmMicrokernelTester()
953           .mr(4)
954           .nr(8)
955           .kr(1)
956           .sr(1)
957           .m(4)
958           .n(8)
959           .k(k)
960           .ks(3)
961           .a_offset(163)
962           .zero_index(mz)
963           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
964       }
965     }
966   }
967 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,qmin)968   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, qmin) {
969     TEST_REQUIRES_ARM_NEON;
970     GemmMicrokernelTester()
971       .mr(4)
972       .nr(8)
973       .kr(1)
974       .sr(1)
975       .m(4)
976       .n(8)
977       .k(8)
978       .qmin(128)
979       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
980   }
981 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,qmax)982   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, qmax) {
983     TEST_REQUIRES_ARM_NEON;
984     GemmMicrokernelTester()
985       .mr(4)
986       .nr(8)
987       .kr(1)
988       .sr(1)
989       .m(4)
990       .n(8)
991       .k(8)
992       .qmax(128)
993       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
994   }
995 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,strided_cm)996   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, strided_cm) {
997     TEST_REQUIRES_ARM_NEON;
998     GemmMicrokernelTester()
999       .mr(4)
1000       .nr(8)
1001       .kr(1)
1002       .sr(1)
1003       .m(4)
1004       .n(8)
1005       .k(8)
1006       .cm_stride(11)
1007       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1008   }
1009 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,no_a_zero_point)1010   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, no_a_zero_point) {
1011     TEST_REQUIRES_ARM_NEON;
1012     for (size_t k = 1; k <= 40; k += 9) {
1013       GemmMicrokernelTester()
1014         .mr(4)
1015         .nr(8)
1016         .kr(1)
1017         .sr(1)
1018         .m(4)
1019         .n(8)
1020         .k(k)
1021         .a_zero_point(0)
1022         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1023     }
1024   }
1025 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,no_b_zero_point)1026   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, no_b_zero_point) {
1027     TEST_REQUIRES_ARM_NEON;
1028     for (size_t k = 1; k <= 40; k += 9) {
1029       GemmMicrokernelTester()
1030         .mr(4)
1031         .nr(8)
1032         .kr(1)
1033         .sr(1)
1034         .m(4)
1035         .n(8)
1036         .k(k)
1037         .b_zero_point(0)
1038         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1039     }
1040   }
1041 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,no_zero_point)1042   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, no_zero_point) {
1043     TEST_REQUIRES_ARM_NEON;
1044     for (size_t k = 1; k <= 40; k += 9) {
1045       GemmMicrokernelTester()
1046         .mr(4)
1047         .nr(8)
1048         .kr(1)
1049         .sr(1)
1050         .m(4)
1051         .n(8)
1052         .k(k)
1053         .a_zero_point(0)
1054         .b_zero_point(0)
1055         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1056     }
1057   }
1058 #endif  // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
1059 
1060 
1061 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8)1062   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8) {
1063     TEST_REQUIRES_ARM_NEON;
1064     GemmMicrokernelTester()
1065       .mr(4)
1066       .nr(8)
1067       .kr(1)
1068       .sr(1)
1069       .m(4)
1070       .n(8)
1071       .k(8)
1072       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1073   }
1074 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cn)1075   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) {
1076     TEST_REQUIRES_ARM_NEON;
1077     GemmMicrokernelTester()
1078       .mr(4)
1079       .nr(8)
1080       .kr(1)
1081       .sr(1)
1082       .m(4)
1083       .n(8)
1084       .k(8)
1085       .cn_stride(11)
1086       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1087   }
1088 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile)1089   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) {
1090     TEST_REQUIRES_ARM_NEON;
1091     for (uint32_t n = 1; n <= 8; n++) {
1092       for (uint32_t m = 1; m <= 4; m++) {
1093         GemmMicrokernelTester()
1094           .mr(4)
1095           .nr(8)
1096           .kr(1)
1097           .sr(1)
1098           .m(m)
1099           .n(n)
1100           .k(8)
1101           .iterations(1)
1102           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1103       }
1104     }
1105   }
1106 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_m)1107   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_m) {
1108     TEST_REQUIRES_ARM_NEON;
1109     for (uint32_t m = 1; m <= 4; m++) {
1110       GemmMicrokernelTester()
1111         .mr(4)
1112         .nr(8)
1113         .kr(1)
1114         .sr(1)
1115         .m(m)
1116         .n(8)
1117         .k(8)
1118         .iterations(1)
1119         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1120     }
1121   }
1122 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_n)1123   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_n) {
1124     TEST_REQUIRES_ARM_NEON;
1125     for (uint32_t n = 1; n <= 8; n++) {
1126       GemmMicrokernelTester()
1127         .mr(4)
1128         .nr(8)
1129         .kr(1)
1130         .sr(1)
1131         .m(4)
1132         .n(n)
1133         .k(8)
1134         .iterations(1)
1135         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1136     }
1137   }
1138 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8)1139   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8) {
1140     TEST_REQUIRES_ARM_NEON;
1141     for (size_t k = 1; k < 8; k++) {
1142       GemmMicrokernelTester()
1143         .mr(4)
1144         .nr(8)
1145         .kr(1)
1146         .sr(1)
1147         .m(4)
1148         .n(8)
1149         .k(k)
1150         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1151     }
1152   }
1153 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8_subtile)1154   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_subtile) {
1155     TEST_REQUIRES_ARM_NEON;
1156     for (size_t k = 1; k < 8; k++) {
1157       for (uint32_t n = 1; n <= 8; n++) {
1158         for (uint32_t m = 1; m <= 4; m++) {
1159           GemmMicrokernelTester()
1160             .mr(4)
1161             .nr(8)
1162             .kr(1)
1163             .sr(1)
1164             .m(m)
1165             .n(n)
1166             .k(k)
1167             .iterations(1)
1168             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1169         }
1170       }
1171     }
1172   }
1173 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8)1174   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8) {
1175     TEST_REQUIRES_ARM_NEON;
1176     for (size_t k = 9; k < 16; k++) {
1177       GemmMicrokernelTester()
1178         .mr(4)
1179         .nr(8)
1180         .kr(1)
1181         .sr(1)
1182         .m(4)
1183         .n(8)
1184         .k(k)
1185         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1186     }
1187   }
1188 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8_subtile)1189   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_subtile) {
1190     TEST_REQUIRES_ARM_NEON;
1191     for (size_t k = 9; k < 16; k++) {
1192       for (uint32_t n = 1; n <= 8; n++) {
1193         for (uint32_t m = 1; m <= 4; m++) {
1194           GemmMicrokernelTester()
1195             .mr(4)
1196             .nr(8)
1197             .kr(1)
1198             .sr(1)
1199             .m(m)
1200             .n(n)
1201             .k(k)
1202             .iterations(1)
1203             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1204         }
1205       }
1206     }
1207   }
1208 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_div_8)1209   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8) {
1210     TEST_REQUIRES_ARM_NEON;
1211     for (size_t k = 16; k <= 80; k += 8) {
1212       GemmMicrokernelTester()
1213         .mr(4)
1214         .nr(8)
1215         .kr(1)
1216         .sr(1)
1217         .m(4)
1218         .n(8)
1219         .k(k)
1220         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1221     }
1222   }
1223 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_div_8_subtile)1224   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_subtile) {
1225     TEST_REQUIRES_ARM_NEON;
1226     for (size_t k = 16; k <= 80; k += 8) {
1227       for (uint32_t n = 1; n <= 8; n++) {
1228         for (uint32_t m = 1; m <= 4; m++) {
1229           GemmMicrokernelTester()
1230             .mr(4)
1231             .nr(8)
1232             .kr(1)
1233             .sr(1)
1234             .m(m)
1235             .n(n)
1236             .k(k)
1237             .iterations(1)
1238             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1239         }
1240       }
1241     }
1242   }
1243 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8)1244   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8) {
1245     TEST_REQUIRES_ARM_NEON;
1246     for (uint32_t n = 9; n < 16; n++) {
1247       for (size_t k = 1; k <= 40; k += 9) {
1248         GemmMicrokernelTester()
1249           .mr(4)
1250           .nr(8)
1251           .kr(1)
1252           .sr(1)
1253           .m(4)
1254           .n(n)
1255           .k(k)
1256           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1257       }
1258     }
1259   }
1260 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8_strided_cn)1261   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
1262     TEST_REQUIRES_ARM_NEON;
1263     for (uint32_t n = 9; n < 16; n++) {
1264       for (size_t k = 1; k <= 40; k += 9) {
1265         GemmMicrokernelTester()
1266           .mr(4)
1267           .nr(8)
1268           .kr(1)
1269           .sr(1)
1270           .m(4)
1271           .n(n)
1272           .k(k)
1273           .cn_stride(11)
1274           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1275       }
1276     }
1277   }
1278 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8_subtile)1279   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8_subtile) {
1280     TEST_REQUIRES_ARM_NEON;
1281     for (uint32_t n = 9; n < 16; n++) {
1282       for (size_t k = 1; k <= 40; k += 9) {
1283         for (uint32_t m = 1; m <= 4; m++) {
1284           GemmMicrokernelTester()
1285             .mr(4)
1286             .nr(8)
1287             .kr(1)
1288             .sr(1)
1289             .m(m)
1290             .n(n)
1291             .k(k)
1292             .iterations(1)
1293             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1294         }
1295       }
1296     }
1297   }
1298 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_8)1299   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_8) {
1300     TEST_REQUIRES_ARM_NEON;
1301     for (uint32_t n = 16; n <= 24; n += 8) {
1302       for (size_t k = 1; k <= 40; k += 9) {
1303         GemmMicrokernelTester()
1304           .mr(4)
1305           .nr(8)
1306           .kr(1)
1307           .sr(1)
1308           .m(4)
1309           .n(n)
1310           .k(k)
1311           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1312       }
1313     }
1314   }
1315 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_8_strided_cn)1316   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_8_strided_cn) {
1317     TEST_REQUIRES_ARM_NEON;
1318     for (uint32_t n = 16; n <= 24; n += 8) {
1319       for (size_t k = 1; k <= 40; k += 9) {
1320         GemmMicrokernelTester()
1321           .mr(4)
1322           .nr(8)
1323           .kr(1)
1324           .sr(1)
1325           .m(4)
1326           .n(n)
1327           .k(k)
1328           .cn_stride(11)
1329           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1330       }
1331     }
1332   }
1333 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_8_subtile)1334   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_8_subtile) {
1335     TEST_REQUIRES_ARM_NEON;
1336     for (uint32_t n = 16; n <= 24; n += 8) {
1337       for (size_t k = 1; k <= 40; k += 9) {
1338         for (uint32_t m = 1; m <= 4; m++) {
1339           GemmMicrokernelTester()
1340             .mr(4)
1341             .nr(8)
1342             .kr(1)
1343             .sr(1)
1344             .m(m)
1345             .n(n)
1346             .k(k)
1347             .iterations(1)
1348             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1349         }
1350       }
1351     }
1352   }
1353 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,small_kernel)1354   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel) {
1355     TEST_REQUIRES_ARM_NEON;
1356     for (size_t k = 1; k <= 40; k += 9) {
1357       GemmMicrokernelTester()
1358         .mr(4)
1359         .nr(8)
1360         .kr(1)
1361         .sr(1)
1362         .m(4)
1363         .n(8)
1364         .k(k)
1365         .ks(3)
1366         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1367     }
1368   }
1369 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,small_kernel_subtile)1370   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel_subtile) {
1371     TEST_REQUIRES_ARM_NEON;
1372     for (size_t k = 1; k <= 40; k += 9) {
1373       for (uint32_t n = 1; n <= 8; n++) {
1374         for (uint32_t m = 1; m <= 4; m++) {
1375           GemmMicrokernelTester()
1376             .mr(4)
1377             .nr(8)
1378             .kr(1)
1379             .sr(1)
1380             .m(m)
1381             .n(n)
1382             .k(k)
1383             .ks(3)
1384             .iterations(1)
1385             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1386         }
1387       }
1388     }
1389   }
1390 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8_small_kernel)1391   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8_small_kernel) {
1392     TEST_REQUIRES_ARM_NEON;
1393     for (uint32_t n = 9; n < 16; n++) {
1394       for (size_t k = 1; k <= 40; k += 9) {
1395         GemmMicrokernelTester()
1396           .mr(4)
1397           .nr(8)
1398           .kr(1)
1399           .sr(1)
1400           .m(4)
1401           .n(n)
1402           .k(k)
1403           .ks(3)
1404           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1405       }
1406     }
1407   }
1408 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_8_small_kernel)1409   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_8_small_kernel) {
1410     TEST_REQUIRES_ARM_NEON;
1411     for (uint32_t n = 16; n <= 24; n += 8) {
1412       for (size_t k = 1; k <= 40; k += 9) {
1413         GemmMicrokernelTester()
1414           .mr(4)
1415           .nr(8)
1416           .kr(1)
1417           .sr(1)
1418           .m(4)
1419           .n(n)
1420           .k(k)
1421           .ks(3)
1422           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1423       }
1424     }
1425   }
1426 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cm_subtile)1427   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm_subtile) {
1428     TEST_REQUIRES_ARM_NEON;
1429     for (size_t k = 1; k <= 40; k += 9) {
1430       for (uint32_t n = 1; n <= 8; n++) {
1431         for (uint32_t m = 1; m <= 4; m++) {
1432           GemmMicrokernelTester()
1433             .mr(4)
1434             .nr(8)
1435             .kr(1)
1436             .sr(1)
1437             .m(m)
1438             .n(n)
1439             .k(k)
1440             .cm_stride(11)
1441             .iterations(1)
1442             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1443         }
1444       }
1445     }
1446   }
1447 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,a_offset)1448   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, a_offset) {
1449     TEST_REQUIRES_ARM_NEON;
1450     for (size_t k = 1; k <= 40; k += 9) {
1451       GemmMicrokernelTester()
1452         .mr(4)
1453         .nr(8)
1454         .kr(1)
1455         .sr(1)
1456         .m(4)
1457         .n(8)
1458         .k(k)
1459         .ks(3)
1460         .a_offset(163)
1461         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1462     }
1463   }
1464 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,zero)1465   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, zero) {
1466     TEST_REQUIRES_ARM_NEON;
1467     for (size_t k = 1; k <= 40; k += 9) {
1468       for (uint32_t mz = 0; mz < 4; mz++) {
1469         GemmMicrokernelTester()
1470           .mr(4)
1471           .nr(8)
1472           .kr(1)
1473           .sr(1)
1474           .m(4)
1475           .n(8)
1476           .k(k)
1477           .ks(3)
1478           .a_offset(163)
1479           .zero_index(mz)
1480           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1481       }
1482     }
1483   }
1484 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,qmin)1485   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmin) {
1486     TEST_REQUIRES_ARM_NEON;
1487     GemmMicrokernelTester()
1488       .mr(4)
1489       .nr(8)
1490       .kr(1)
1491       .sr(1)
1492       .m(4)
1493       .n(8)
1494       .k(8)
1495       .qmin(128)
1496       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1497   }
1498 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,qmax)1499   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmax) {
1500     TEST_REQUIRES_ARM_NEON;
1501     GemmMicrokernelTester()
1502       .mr(4)
1503       .nr(8)
1504       .kr(1)
1505       .sr(1)
1506       .m(4)
1507       .n(8)
1508       .k(8)
1509       .qmax(128)
1510       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1511   }
1512 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cm)1513   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) {
1514     TEST_REQUIRES_ARM_NEON;
1515     GemmMicrokernelTester()
1516       .mr(4)
1517       .nr(8)
1518       .kr(1)
1519       .sr(1)
1520       .m(4)
1521       .n(8)
1522       .k(8)
1523       .cm_stride(11)
1524       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1525   }
1526 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,no_a_zero_point)1527   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, no_a_zero_point) {
1528     TEST_REQUIRES_ARM_NEON;
1529     for (size_t k = 1; k <= 40; k += 9) {
1530       GemmMicrokernelTester()
1531         .mr(4)
1532         .nr(8)
1533         .kr(1)
1534         .sr(1)
1535         .m(4)
1536         .n(8)
1537         .k(k)
1538         .a_zero_point(0)
1539         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1540     }
1541   }
1542 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,no_b_zero_point)1543   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, no_b_zero_point) {
1544     TEST_REQUIRES_ARM_NEON;
1545     for (size_t k = 1; k <= 40; k += 9) {
1546       GemmMicrokernelTester()
1547         .mr(4)
1548         .nr(8)
1549         .kr(1)
1550         .sr(1)
1551         .m(4)
1552         .n(8)
1553         .k(k)
1554         .b_zero_point(0)
1555         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1556     }
1557   }
1558 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,no_zero_point)1559   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, no_zero_point) {
1560     TEST_REQUIRES_ARM_NEON;
1561     for (size_t k = 1; k <= 40; k += 9) {
1562       GemmMicrokernelTester()
1563         .mr(4)
1564         .nr(8)
1565         .kr(1)
1566         .sr(1)
1567         .m(4)
1568         .n(8)
1569         .k(k)
1570         .a_zero_point(0)
1571         .b_zero_point(0)
1572         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1573     }
1574   }
1575 #endif  // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
1576 
1577 
1578 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_eq_8)1579   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8) {
1580     TEST_REQUIRES_ARM_NEON;
1581     GemmMicrokernelTester()
1582       .mr(4)
1583       .nr(16)
1584       .kr(1)
1585       .sr(1)
1586       .m(4)
1587       .n(16)
1588       .k(8)
1589       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1590   }
1591 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,strided_cn)1592   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cn) {
1593     TEST_REQUIRES_ARM_NEON;
1594     GemmMicrokernelTester()
1595       .mr(4)
1596       .nr(16)
1597       .kr(1)
1598       .sr(1)
1599       .m(4)
1600       .n(16)
1601       .k(8)
1602       .cn_stride(19)
1603       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1604   }
1605 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile)1606   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile) {
1607     TEST_REQUIRES_ARM_NEON;
1608     for (uint32_t n = 1; n <= 16; n++) {
1609       for (uint32_t m = 1; m <= 4; m++) {
1610         GemmMicrokernelTester()
1611           .mr(4)
1612           .nr(16)
1613           .kr(1)
1614           .sr(1)
1615           .m(m)
1616           .n(n)
1617           .k(8)
1618           .iterations(1)
1619           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1620       }
1621     }
1622   }
1623 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile_m)1624   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_m) {
1625     TEST_REQUIRES_ARM_NEON;
1626     for (uint32_t m = 1; m <= 4; m++) {
1627       GemmMicrokernelTester()
1628         .mr(4)
1629         .nr(16)
1630         .kr(1)
1631         .sr(1)
1632         .m(m)
1633         .n(16)
1634         .k(8)
1635         .iterations(1)
1636         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1637     }
1638   }
1639 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile_n)1640   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_n) {
1641     TEST_REQUIRES_ARM_NEON;
1642     for (uint32_t n = 1; n <= 16; n++) {
1643       GemmMicrokernelTester()
1644         .mr(4)
1645         .nr(16)
1646         .kr(1)
1647         .sr(1)
1648         .m(4)
1649         .n(n)
1650         .k(8)
1651         .iterations(1)
1652         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1653     }
1654   }
1655 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_lt_8)1656   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8) {
1657     TEST_REQUIRES_ARM_NEON;
1658     for (size_t k = 1; k < 8; k++) {
1659       GemmMicrokernelTester()
1660         .mr(4)
1661         .nr(16)
1662         .kr(1)
1663         .sr(1)
1664         .m(4)
1665         .n(16)
1666         .k(k)
1667         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1668     }
1669   }
1670 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_lt_8_subtile)1671   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8_subtile) {
1672     TEST_REQUIRES_ARM_NEON;
1673     for (size_t k = 1; k < 8; k++) {
1674       for (uint32_t n = 1; n <= 16; n++) {
1675         for (uint32_t m = 1; m <= 4; m++) {
1676           GemmMicrokernelTester()
1677             .mr(4)
1678             .nr(16)
1679             .kr(1)
1680             .sr(1)
1681             .m(m)
1682             .n(n)
1683             .k(k)
1684             .iterations(1)
1685             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1686         }
1687       }
1688     }
1689   }
1690 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_gt_8)1691   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8) {
1692     TEST_REQUIRES_ARM_NEON;
1693     for (size_t k = 9; k < 16; k++) {
1694       GemmMicrokernelTester()
1695         .mr(4)
1696         .nr(16)
1697         .kr(1)
1698         .sr(1)
1699         .m(4)
1700         .n(16)
1701         .k(k)
1702         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1703     }
1704   }
1705 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_gt_8_subtile)1706   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8_subtile) {
1707     TEST_REQUIRES_ARM_NEON;
1708     for (size_t k = 9; k < 16; k++) {
1709       for (uint32_t n = 1; n <= 16; n++) {
1710         for (uint32_t m = 1; m <= 4; m++) {
1711           GemmMicrokernelTester()
1712             .mr(4)
1713             .nr(16)
1714             .kr(1)
1715             .sr(1)
1716             .m(m)
1717             .n(n)
1718             .k(k)
1719             .iterations(1)
1720             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1721         }
1722       }
1723     }
1724   }
1725 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_div_8)1726   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8) {
1727     TEST_REQUIRES_ARM_NEON;
1728     for (size_t k = 16; k <= 80; k += 8) {
1729       GemmMicrokernelTester()
1730         .mr(4)
1731         .nr(16)
1732         .kr(1)
1733         .sr(1)
1734         .m(4)
1735         .n(16)
1736         .k(k)
1737         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1738     }
1739   }
1740 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_div_8_subtile)1741   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8_subtile) {
1742     TEST_REQUIRES_ARM_NEON;
1743     for (size_t k = 16; k <= 80; k += 8) {
1744       for (uint32_t n = 1; n <= 16; n++) {
1745         for (uint32_t m = 1; m <= 4; m++) {
1746           GemmMicrokernelTester()
1747             .mr(4)
1748             .nr(16)
1749             .kr(1)
1750             .sr(1)
1751             .m(m)
1752             .n(n)
1753             .k(k)
1754             .iterations(1)
1755             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1756         }
1757       }
1758     }
1759   }
1760 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_gt_16)1761   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16) {
1762     TEST_REQUIRES_ARM_NEON;
1763     for (uint32_t n = 17; n < 32; n++) {
1764       for (size_t k = 1; k <= 40; k += 9) {
1765         GemmMicrokernelTester()
1766           .mr(4)
1767           .nr(16)
1768           .kr(1)
1769           .sr(1)
1770           .m(4)
1771           .n(n)
1772           .k(k)
1773           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1774       }
1775     }
1776   }
1777 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_gt_16_strided_cn)1778   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_strided_cn) {
1779     TEST_REQUIRES_ARM_NEON;
1780     for (uint32_t n = 17; n < 32; n++) {
1781       for (size_t k = 1; k <= 40; k += 9) {
1782         GemmMicrokernelTester()
1783           .mr(4)
1784           .nr(16)
1785           .kr(1)
1786           .sr(1)
1787           .m(4)
1788           .n(n)
1789           .k(k)
1790           .cn_stride(19)
1791           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1792       }
1793     }
1794   }
1795 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_gt_16_subtile)1796   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_subtile) {
1797     TEST_REQUIRES_ARM_NEON;
1798     for (uint32_t n = 17; n < 32; n++) {
1799       for (size_t k = 1; k <= 40; k += 9) {
1800         for (uint32_t m = 1; m <= 4; m++) {
1801           GemmMicrokernelTester()
1802             .mr(4)
1803             .nr(16)
1804             .kr(1)
1805             .sr(1)
1806             .m(m)
1807             .n(n)
1808             .k(k)
1809             .iterations(1)
1810             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1811         }
1812       }
1813     }
1814   }
1815 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_div_16)1816   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16) {
1817     TEST_REQUIRES_ARM_NEON;
1818     for (uint32_t n = 32; n <= 48; n += 16) {
1819       for (size_t k = 1; k <= 40; k += 9) {
1820         GemmMicrokernelTester()
1821           .mr(4)
1822           .nr(16)
1823           .kr(1)
1824           .sr(1)
1825           .m(4)
1826           .n(n)
1827           .k(k)
1828           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1829       }
1830     }
1831   }
1832 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_div_16_strided_cn)1833   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_strided_cn) {
1834     TEST_REQUIRES_ARM_NEON;
1835     for (uint32_t n = 32; n <= 48; n += 16) {
1836       for (size_t k = 1; k <= 40; k += 9) {
1837         GemmMicrokernelTester()
1838           .mr(4)
1839           .nr(16)
1840           .kr(1)
1841           .sr(1)
1842           .m(4)
1843           .n(n)
1844           .k(k)
1845           .cn_stride(19)
1846           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1847       }
1848     }
1849   }
1850 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_div_16_subtile)1851   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_subtile) {
1852     TEST_REQUIRES_ARM_NEON;
1853     for (uint32_t n = 32; n <= 48; n += 16) {
1854       for (size_t k = 1; k <= 40; k += 9) {
1855         for (uint32_t m = 1; m <= 4; m++) {
1856           GemmMicrokernelTester()
1857             .mr(4)
1858             .nr(16)
1859             .kr(1)
1860             .sr(1)
1861             .m(m)
1862             .n(n)
1863             .k(k)
1864             .iterations(1)
1865             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1866         }
1867       }
1868     }
1869   }
1870 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,small_kernel)1871   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, small_kernel) {
1872     TEST_REQUIRES_ARM_NEON;
1873     for (size_t k = 1; k <= 40; k += 9) {
1874       GemmMicrokernelTester()
1875         .mr(4)
1876         .nr(16)
1877         .kr(1)
1878         .sr(1)
1879         .m(4)
1880         .n(16)
1881         .k(k)
1882         .ks(3)
1883         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1884     }
1885   }
1886 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,small_kernel_subtile)1887   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, small_kernel_subtile) {
1888     TEST_REQUIRES_ARM_NEON;
1889     for (size_t k = 1; k <= 40; k += 9) {
1890       for (uint32_t n = 1; n <= 16; n++) {
1891         for (uint32_t m = 1; m <= 4; m++) {
1892           GemmMicrokernelTester()
1893             .mr(4)
1894             .nr(16)
1895             .kr(1)
1896             .sr(1)
1897             .m(m)
1898             .n(n)
1899             .k(k)
1900             .ks(3)
1901             .iterations(1)
1902             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1903         }
1904       }
1905     }
1906   }
1907 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_gt_16_small_kernel)1908   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_small_kernel) {
1909     TEST_REQUIRES_ARM_NEON;
1910     for (uint32_t n = 17; n < 32; n++) {
1911       for (size_t k = 1; k <= 40; k += 9) {
1912         GemmMicrokernelTester()
1913           .mr(4)
1914           .nr(16)
1915           .kr(1)
1916           .sr(1)
1917           .m(4)
1918           .n(n)
1919           .k(k)
1920           .ks(3)
1921           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1922       }
1923     }
1924   }
1925 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_div_16_small_kernel)1926   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_small_kernel) {
1927     TEST_REQUIRES_ARM_NEON;
1928     for (uint32_t n = 32; n <= 48; n += 16) {
1929       for (size_t k = 1; k <= 40; k += 9) {
1930         GemmMicrokernelTester()
1931           .mr(4)
1932           .nr(16)
1933           .kr(1)
1934           .sr(1)
1935           .m(4)
1936           .n(n)
1937           .k(k)
1938           .ks(3)
1939           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1940       }
1941     }
1942   }
1943 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,strided_cm_subtile)1944   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm_subtile) {
1945     TEST_REQUIRES_ARM_NEON;
1946     for (size_t k = 1; k <= 40; k += 9) {
1947       for (uint32_t n = 1; n <= 16; n++) {
1948         for (uint32_t m = 1; m <= 4; m++) {
1949           GemmMicrokernelTester()
1950             .mr(4)
1951             .nr(16)
1952             .kr(1)
1953             .sr(1)
1954             .m(m)
1955             .n(n)
1956             .k(k)
1957             .cm_stride(19)
1958             .iterations(1)
1959             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1960         }
1961       }
1962     }
1963   }
1964 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,a_offset)1965   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, a_offset) {
1966     TEST_REQUIRES_ARM_NEON;
1967     for (size_t k = 1; k <= 40; k += 9) {
1968       GemmMicrokernelTester()
1969         .mr(4)
1970         .nr(16)
1971         .kr(1)
1972         .sr(1)
1973         .m(4)
1974         .n(16)
1975         .k(k)
1976         .ks(3)
1977         .a_offset(163)
1978         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1979     }
1980   }
1981 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,zero)1982   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, zero) {
1983     TEST_REQUIRES_ARM_NEON;
1984     for (size_t k = 1; k <= 40; k += 9) {
1985       for (uint32_t mz = 0; mz < 4; mz++) {
1986         GemmMicrokernelTester()
1987           .mr(4)
1988           .nr(16)
1989           .kr(1)
1990           .sr(1)
1991           .m(4)
1992           .n(16)
1993           .k(k)
1994           .ks(3)
1995           .a_offset(163)
1996           .zero_index(mz)
1997           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1998       }
1999     }
2000   }
2001 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,qmin)2002   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmin) {
2003     TEST_REQUIRES_ARM_NEON;
2004     GemmMicrokernelTester()
2005       .mr(4)
2006       .nr(16)
2007       .kr(1)
2008       .sr(1)
2009       .m(4)
2010       .n(16)
2011       .k(8)
2012       .qmin(128)
2013       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2014   }
2015 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,qmax)2016   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmax) {
2017     TEST_REQUIRES_ARM_NEON;
2018     GemmMicrokernelTester()
2019       .mr(4)
2020       .nr(16)
2021       .kr(1)
2022       .sr(1)
2023       .m(4)
2024       .n(16)
2025       .k(8)
2026       .qmax(128)
2027       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2028   }
2029 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,strided_cm)2030   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm) {
2031     TEST_REQUIRES_ARM_NEON;
2032     GemmMicrokernelTester()
2033       .mr(4)
2034       .nr(16)
2035       .kr(1)
2036       .sr(1)
2037       .m(4)
2038       .n(16)
2039       .k(8)
2040       .cm_stride(19)
2041       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2042   }
2043 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,no_a_zero_point)2044   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, no_a_zero_point) {
2045     TEST_REQUIRES_ARM_NEON;
2046     for (size_t k = 1; k <= 40; k += 9) {
2047       GemmMicrokernelTester()
2048         .mr(4)
2049         .nr(16)
2050         .kr(1)
2051         .sr(1)
2052         .m(4)
2053         .n(16)
2054         .k(k)
2055         .a_zero_point(0)
2056         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2057     }
2058   }
2059 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,no_b_zero_point)2060   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, no_b_zero_point) {
2061     TEST_REQUIRES_ARM_NEON;
2062     for (size_t k = 1; k <= 40; k += 9) {
2063       GemmMicrokernelTester()
2064         .mr(4)
2065         .nr(16)
2066         .kr(1)
2067         .sr(1)
2068         .m(4)
2069         .n(16)
2070         .k(k)
2071         .b_zero_point(0)
2072         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2073     }
2074   }
2075 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,no_zero_point)2076   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, no_zero_point) {
2077     TEST_REQUIRES_ARM_NEON;
2078     for (size_t k = 1; k <= 40; k += 9) {
2079       GemmMicrokernelTester()
2080         .mr(4)
2081         .nr(16)
2082         .kr(1)
2083         .sr(1)
2084         .m(4)
2085         .n(16)
2086         .k(k)
2087         .a_zero_point(0)
2088         .b_zero_point(0)
2089         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2090     }
2091   }
2092 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2093 
2094 
2095 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,k_eq_8)2096   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_eq_8) {
2097     TEST_REQUIRES_ARM_NEON;
2098     GemmMicrokernelTester()
2099       .mr(4)
2100       .nr(16)
2101       .kr(1)
2102       .sr(1)
2103       .m(4)
2104       .n(16)
2105       .k(8)
2106       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2107   }
2108 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,strided_cn)2109   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, strided_cn) {
2110     TEST_REQUIRES_ARM_NEON;
2111     GemmMicrokernelTester()
2112       .mr(4)
2113       .nr(16)
2114       .kr(1)
2115       .sr(1)
2116       .m(4)
2117       .n(16)
2118       .k(8)
2119       .cn_stride(19)
2120       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2121   }
2122 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,k_eq_8_subtile)2123   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_eq_8_subtile) {
2124     TEST_REQUIRES_ARM_NEON;
2125     for (uint32_t n = 1; n <= 16; n++) {
2126       for (uint32_t m = 1; m <= 4; m++) {
2127         GemmMicrokernelTester()
2128           .mr(4)
2129           .nr(16)
2130           .kr(1)
2131           .sr(1)
2132           .m(m)
2133           .n(n)
2134           .k(8)
2135           .iterations(1)
2136           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2137       }
2138     }
2139   }
2140 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,k_eq_8_subtile_m)2141   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
2142     TEST_REQUIRES_ARM_NEON;
2143     for (uint32_t m = 1; m <= 4; m++) {
2144       GemmMicrokernelTester()
2145         .mr(4)
2146         .nr(16)
2147         .kr(1)
2148         .sr(1)
2149         .m(m)
2150         .n(16)
2151         .k(8)
2152         .iterations(1)
2153         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2154     }
2155   }
2156 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,k_eq_8_subtile_n)2157   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
2158     TEST_REQUIRES_ARM_NEON;
2159     for (uint32_t n = 1; n <= 16; n++) {
2160       GemmMicrokernelTester()
2161         .mr(4)
2162         .nr(16)
2163         .kr(1)
2164         .sr(1)
2165         .m(4)
2166         .n(n)
2167         .k(8)
2168         .iterations(1)
2169         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2170     }
2171   }
2172 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,k_lt_8)2173   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_lt_8) {
2174     TEST_REQUIRES_ARM_NEON;
2175     for (size_t k = 1; k < 8; k++) {
2176       GemmMicrokernelTester()
2177         .mr(4)
2178         .nr(16)
2179         .kr(1)
2180         .sr(1)
2181         .m(4)
2182         .n(16)
2183         .k(k)
2184         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2185     }
2186   }
2187 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,k_lt_8_subtile)2188   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_lt_8_subtile) {
2189     TEST_REQUIRES_ARM_NEON;
2190     for (size_t k = 1; k < 8; k++) {
2191       for (uint32_t n = 1; n <= 16; n++) {
2192         for (uint32_t m = 1; m <= 4; m++) {
2193           GemmMicrokernelTester()
2194             .mr(4)
2195             .nr(16)
2196             .kr(1)
2197             .sr(1)
2198             .m(m)
2199             .n(n)
2200             .k(k)
2201             .iterations(1)
2202             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2203         }
2204       }
2205     }
2206   }
2207 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,k_gt_8)2208   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_gt_8) {
2209     TEST_REQUIRES_ARM_NEON;
2210     for (size_t k = 9; k < 16; k++) {
2211       GemmMicrokernelTester()
2212         .mr(4)
2213         .nr(16)
2214         .kr(1)
2215         .sr(1)
2216         .m(4)
2217         .n(16)
2218         .k(k)
2219         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2220     }
2221   }
2222 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,k_gt_8_subtile)2223   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_gt_8_subtile) {
2224     TEST_REQUIRES_ARM_NEON;
2225     for (size_t k = 9; k < 16; k++) {
2226       for (uint32_t n = 1; n <= 16; n++) {
2227         for (uint32_t m = 1; m <= 4; m++) {
2228           GemmMicrokernelTester()
2229             .mr(4)
2230             .nr(16)
2231             .kr(1)
2232             .sr(1)
2233             .m(m)
2234             .n(n)
2235             .k(k)
2236             .iterations(1)
2237             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2238         }
2239       }
2240     }
2241   }
2242 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,k_div_8)2243   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_div_8) {
2244     TEST_REQUIRES_ARM_NEON;
2245     for (size_t k = 16; k <= 80; k += 8) {
2246       GemmMicrokernelTester()
2247         .mr(4)
2248         .nr(16)
2249         .kr(1)
2250         .sr(1)
2251         .m(4)
2252         .n(16)
2253         .k(k)
2254         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2255     }
2256   }
2257 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,k_div_8_subtile)2258   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_div_8_subtile) {
2259     TEST_REQUIRES_ARM_NEON;
2260     for (size_t k = 16; k <= 80; k += 8) {
2261       for (uint32_t n = 1; n <= 16; n++) {
2262         for (uint32_t m = 1; m <= 4; m++) {
2263           GemmMicrokernelTester()
2264             .mr(4)
2265             .nr(16)
2266             .kr(1)
2267             .sr(1)
2268             .m(m)
2269             .n(n)
2270             .k(k)
2271             .iterations(1)
2272             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2273         }
2274       }
2275     }
2276   }
2277 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,n_gt_16)2278   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_gt_16) {
2279     TEST_REQUIRES_ARM_NEON;
2280     for (uint32_t n = 17; n < 32; n++) {
2281       for (size_t k = 1; k <= 40; k += 9) {
2282         GemmMicrokernelTester()
2283           .mr(4)
2284           .nr(16)
2285           .kr(1)
2286           .sr(1)
2287           .m(4)
2288           .n(n)
2289           .k(k)
2290           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2291       }
2292     }
2293   }
2294 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,n_gt_16_strided_cn)2295   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_gt_16_strided_cn) {
2296     TEST_REQUIRES_ARM_NEON;
2297     for (uint32_t n = 17; n < 32; n++) {
2298       for (size_t k = 1; k <= 40; k += 9) {
2299         GemmMicrokernelTester()
2300           .mr(4)
2301           .nr(16)
2302           .kr(1)
2303           .sr(1)
2304           .m(4)
2305           .n(n)
2306           .k(k)
2307           .cn_stride(19)
2308           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2309       }
2310     }
2311   }
2312 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,n_gt_16_subtile)2313   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_gt_16_subtile) {
2314     TEST_REQUIRES_ARM_NEON;
2315     for (uint32_t n = 17; n < 32; n++) {
2316       for (size_t k = 1; k <= 40; k += 9) {
2317         for (uint32_t m = 1; m <= 4; m++) {
2318           GemmMicrokernelTester()
2319             .mr(4)
2320             .nr(16)
2321             .kr(1)
2322             .sr(1)
2323             .m(m)
2324             .n(n)
2325             .k(k)
2326             .iterations(1)
2327             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2328         }
2329       }
2330     }
2331   }
2332 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,n_div_16)2333   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_div_16) {
2334     TEST_REQUIRES_ARM_NEON;
2335     for (uint32_t n = 32; n <= 48; n += 16) {
2336       for (size_t k = 1; k <= 40; k += 9) {
2337         GemmMicrokernelTester()
2338           .mr(4)
2339           .nr(16)
2340           .kr(1)
2341           .sr(1)
2342           .m(4)
2343           .n(n)
2344           .k(k)
2345           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2346       }
2347     }
2348   }
2349 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,n_div_16_strided_cn)2350   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_div_16_strided_cn) {
2351     TEST_REQUIRES_ARM_NEON;
2352     for (uint32_t n = 32; n <= 48; n += 16) {
2353       for (size_t k = 1; k <= 40; k += 9) {
2354         GemmMicrokernelTester()
2355           .mr(4)
2356           .nr(16)
2357           .kr(1)
2358           .sr(1)
2359           .m(4)
2360           .n(n)
2361           .k(k)
2362           .cn_stride(19)
2363           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2364       }
2365     }
2366   }
2367 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,n_div_16_subtile)2368   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_div_16_subtile) {
2369     TEST_REQUIRES_ARM_NEON;
2370     for (uint32_t n = 32; n <= 48; n += 16) {
2371       for (size_t k = 1; k <= 40; k += 9) {
2372         for (uint32_t m = 1; m <= 4; m++) {
2373           GemmMicrokernelTester()
2374             .mr(4)
2375             .nr(16)
2376             .kr(1)
2377             .sr(1)
2378             .m(m)
2379             .n(n)
2380             .k(k)
2381             .iterations(1)
2382             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2383         }
2384       }
2385     }
2386   }
2387 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,small_kernel)2388   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, small_kernel) {
2389     TEST_REQUIRES_ARM_NEON;
2390     for (size_t k = 1; k <= 40; k += 9) {
2391       GemmMicrokernelTester()
2392         .mr(4)
2393         .nr(16)
2394         .kr(1)
2395         .sr(1)
2396         .m(4)
2397         .n(16)
2398         .k(k)
2399         .ks(3)
2400         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2401     }
2402   }
2403 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,small_kernel_subtile)2404   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, small_kernel_subtile) {
2405     TEST_REQUIRES_ARM_NEON;
2406     for (size_t k = 1; k <= 40; k += 9) {
2407       for (uint32_t n = 1; n <= 16; n++) {
2408         for (uint32_t m = 1; m <= 4; m++) {
2409           GemmMicrokernelTester()
2410             .mr(4)
2411             .nr(16)
2412             .kr(1)
2413             .sr(1)
2414             .m(m)
2415             .n(n)
2416             .k(k)
2417             .ks(3)
2418             .iterations(1)
2419             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2420         }
2421       }
2422     }
2423   }
2424 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,n_gt_16_small_kernel)2425   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_gt_16_small_kernel) {
2426     TEST_REQUIRES_ARM_NEON;
2427     for (uint32_t n = 17; n < 32; n++) {
2428       for (size_t k = 1; k <= 40; k += 9) {
2429         GemmMicrokernelTester()
2430           .mr(4)
2431           .nr(16)
2432           .kr(1)
2433           .sr(1)
2434           .m(4)
2435           .n(n)
2436           .k(k)
2437           .ks(3)
2438           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2439       }
2440     }
2441   }
2442 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,n_div_16_small_kernel)2443   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_div_16_small_kernel) {
2444     TEST_REQUIRES_ARM_NEON;
2445     for (uint32_t n = 32; n <= 48; n += 16) {
2446       for (size_t k = 1; k <= 40; k += 9) {
2447         GemmMicrokernelTester()
2448           .mr(4)
2449           .nr(16)
2450           .kr(1)
2451           .sr(1)
2452           .m(4)
2453           .n(n)
2454           .k(k)
2455           .ks(3)
2456           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2457       }
2458     }
2459   }
2460 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,strided_cm_subtile)2461   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, strided_cm_subtile) {
2462     TEST_REQUIRES_ARM_NEON;
2463     for (size_t k = 1; k <= 40; k += 9) {
2464       for (uint32_t n = 1; n <= 16; n++) {
2465         for (uint32_t m = 1; m <= 4; m++) {
2466           GemmMicrokernelTester()
2467             .mr(4)
2468             .nr(16)
2469             .kr(1)
2470             .sr(1)
2471             .m(m)
2472             .n(n)
2473             .k(k)
2474             .cm_stride(19)
2475             .iterations(1)
2476             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2477         }
2478       }
2479     }
2480   }
2481 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,a_offset)2482   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, a_offset) {
2483     TEST_REQUIRES_ARM_NEON;
2484     for (size_t k = 1; k <= 40; k += 9) {
2485       GemmMicrokernelTester()
2486         .mr(4)
2487         .nr(16)
2488         .kr(1)
2489         .sr(1)
2490         .m(4)
2491         .n(16)
2492         .k(k)
2493         .ks(3)
2494         .a_offset(163)
2495         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2496     }
2497   }
2498 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,zero)2499   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, zero) {
2500     TEST_REQUIRES_ARM_NEON;
2501     for (size_t k = 1; k <= 40; k += 9) {
2502       for (uint32_t mz = 0; mz < 4; mz++) {
2503         GemmMicrokernelTester()
2504           .mr(4)
2505           .nr(16)
2506           .kr(1)
2507           .sr(1)
2508           .m(4)
2509           .n(16)
2510           .k(k)
2511           .ks(3)
2512           .a_offset(163)
2513           .zero_index(mz)
2514           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2515       }
2516     }
2517   }
2518 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,qmin)2519   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, qmin) {
2520     TEST_REQUIRES_ARM_NEON;
2521     GemmMicrokernelTester()
2522       .mr(4)
2523       .nr(16)
2524       .kr(1)
2525       .sr(1)
2526       .m(4)
2527       .n(16)
2528       .k(8)
2529       .qmin(128)
2530       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2531   }
2532 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,qmax)2533   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, qmax) {
2534     TEST_REQUIRES_ARM_NEON;
2535     GemmMicrokernelTester()
2536       .mr(4)
2537       .nr(16)
2538       .kr(1)
2539       .sr(1)
2540       .m(4)
2541       .n(16)
2542       .k(8)
2543       .qmax(128)
2544       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2545   }
2546 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,strided_cm)2547   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, strided_cm) {
2548     TEST_REQUIRES_ARM_NEON;
2549     GemmMicrokernelTester()
2550       .mr(4)
2551       .nr(16)
2552       .kr(1)
2553       .sr(1)
2554       .m(4)
2555       .n(16)
2556       .k(8)
2557       .cm_stride(19)
2558       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2559   }
2560 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,no_a_zero_point)2561   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, no_a_zero_point) {
2562     TEST_REQUIRES_ARM_NEON;
2563     for (size_t k = 1; k <= 40; k += 9) {
2564       GemmMicrokernelTester()
2565         .mr(4)
2566         .nr(16)
2567         .kr(1)
2568         .sr(1)
2569         .m(4)
2570         .n(16)
2571         .k(k)
2572         .a_zero_point(0)
2573         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2574     }
2575   }
2576 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,no_b_zero_point)2577   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, no_b_zero_point) {
2578     TEST_REQUIRES_ARM_NEON;
2579     for (size_t k = 1; k <= 40; k += 9) {
2580       GemmMicrokernelTester()
2581         .mr(4)
2582         .nr(16)
2583         .kr(1)
2584         .sr(1)
2585         .m(4)
2586         .n(16)
2587         .k(k)
2588         .b_zero_point(0)
2589         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2590     }
2591   }
2592 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,no_zero_point)2593   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, no_zero_point) {
2594     TEST_REQUIRES_ARM_NEON;
2595     for (size_t k = 1; k <= 40; k += 9) {
2596       GemmMicrokernelTester()
2597         .mr(4)
2598         .nr(16)
2599         .kr(1)
2600         .sr(1)
2601         .m(4)
2602         .n(16)
2603         .k(k)
2604         .a_zero_point(0)
2605         .b_zero_point(0)
2606         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2607     }
2608   }
2609 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2610 
2611 
2612 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16)2613   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16) {
2614     TEST_REQUIRES_ARM_NEON_DOT;
2615     GemmMicrokernelTester()
2616       .mr(4)
2617       .nr(16)
2618       .kr(4)
2619       .sr(1)
2620       .m(4)
2621       .n(16)
2622       .k(16)
2623       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2624   }
2625 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cn)2626   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cn) {
2627     TEST_REQUIRES_ARM_NEON_DOT;
2628     GemmMicrokernelTester()
2629       .mr(4)
2630       .nr(16)
2631       .kr(4)
2632       .sr(1)
2633       .m(4)
2634       .n(16)
2635       .k(16)
2636       .cn_stride(19)
2637       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2638   }
2639 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile)2640   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile) {
2641     TEST_REQUIRES_ARM_NEON_DOT;
2642     for (uint32_t n = 1; n <= 16; n++) {
2643       for (uint32_t m = 1; m <= 4; m++) {
2644         GemmMicrokernelTester()
2645           .mr(4)
2646           .nr(16)
2647           .kr(4)
2648           .sr(1)
2649           .m(m)
2650           .n(n)
2651           .k(16)
2652           .iterations(1)
2653           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2654       }
2655     }
2656   }
2657 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile_m)2658   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_m) {
2659     TEST_REQUIRES_ARM_NEON_DOT;
2660     for (uint32_t m = 1; m <= 4; m++) {
2661       GemmMicrokernelTester()
2662         .mr(4)
2663         .nr(16)
2664         .kr(4)
2665         .sr(1)
2666         .m(m)
2667         .n(16)
2668         .k(16)
2669         .iterations(1)
2670         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2671     }
2672   }
2673 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile_n)2674   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_n) {
2675     TEST_REQUIRES_ARM_NEON_DOT;
2676     for (uint32_t n = 1; n <= 16; n++) {
2677       GemmMicrokernelTester()
2678         .mr(4)
2679         .nr(16)
2680         .kr(4)
2681         .sr(1)
2682         .m(4)
2683         .n(n)
2684         .k(16)
2685         .iterations(1)
2686         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2687     }
2688   }
2689 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_16)2690   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16) {
2691     TEST_REQUIRES_ARM_NEON_DOT;
2692     for (size_t k = 1; k < 16; k++) {
2693       GemmMicrokernelTester()
2694         .mr(4)
2695         .nr(16)
2696         .kr(4)
2697         .sr(1)
2698         .m(4)
2699         .n(16)
2700         .k(k)
2701         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2702     }
2703   }
2704 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_16_subtile)2705   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16_subtile) {
2706     TEST_REQUIRES_ARM_NEON_DOT;
2707     for (size_t k = 1; k < 16; k++) {
2708       for (uint32_t n = 1; n <= 16; n++) {
2709         for (uint32_t m = 1; m <= 4; m++) {
2710           GemmMicrokernelTester()
2711             .mr(4)
2712             .nr(16)
2713             .kr(4)
2714             .sr(1)
2715             .m(m)
2716             .n(n)
2717             .k(k)
2718             .iterations(1)
2719             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2720         }
2721       }
2722     }
2723   }
2724 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_16)2725   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16) {
2726     TEST_REQUIRES_ARM_NEON_DOT;
2727     for (size_t k = 17; k < 32; k++) {
2728       GemmMicrokernelTester()
2729         .mr(4)
2730         .nr(16)
2731         .kr(4)
2732         .sr(1)
2733         .m(4)
2734         .n(16)
2735         .k(k)
2736         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2737     }
2738   }
2739 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_16_subtile)2740   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16_subtile) {
2741     TEST_REQUIRES_ARM_NEON_DOT;
2742     for (size_t k = 17; k < 32; k++) {
2743       for (uint32_t n = 1; n <= 16; n++) {
2744         for (uint32_t m = 1; m <= 4; m++) {
2745           GemmMicrokernelTester()
2746             .mr(4)
2747             .nr(16)
2748             .kr(4)
2749             .sr(1)
2750             .m(m)
2751             .n(n)
2752             .k(k)
2753             .iterations(1)
2754             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2755         }
2756       }
2757     }
2758   }
2759 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_div_16)2760   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16) {
2761     TEST_REQUIRES_ARM_NEON_DOT;
2762     for (size_t k = 32; k <= 160; k += 16) {
2763       GemmMicrokernelTester()
2764         .mr(4)
2765         .nr(16)
2766         .kr(4)
2767         .sr(1)
2768         .m(4)
2769         .n(16)
2770         .k(k)
2771         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2772     }
2773   }
2774 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_div_16_subtile)2775   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16_subtile) {
2776     TEST_REQUIRES_ARM_NEON_DOT;
2777     for (size_t k = 32; k <= 160; k += 16) {
2778       for (uint32_t n = 1; n <= 16; n++) {
2779         for (uint32_t m = 1; m <= 4; m++) {
2780           GemmMicrokernelTester()
2781             .mr(4)
2782             .nr(16)
2783             .kr(4)
2784             .sr(1)
2785             .m(m)
2786             .n(n)
2787             .k(k)
2788             .iterations(1)
2789             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2790         }
2791       }
2792     }
2793   }
2794 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16)2795   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16) {
2796     TEST_REQUIRES_ARM_NEON_DOT;
2797     for (uint32_t n = 17; n < 32; n++) {
2798       for (size_t k = 1; k <= 80; k += 17) {
2799         GemmMicrokernelTester()
2800           .mr(4)
2801           .nr(16)
2802           .kr(4)
2803           .sr(1)
2804           .m(4)
2805           .n(n)
2806           .k(k)
2807           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2808       }
2809     }
2810   }
2811 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_strided_cn)2812   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_strided_cn) {
2813     TEST_REQUIRES_ARM_NEON_DOT;
2814     for (uint32_t n = 17; n < 32; n++) {
2815       for (size_t k = 1; k <= 80; k += 17) {
2816         GemmMicrokernelTester()
2817           .mr(4)
2818           .nr(16)
2819           .kr(4)
2820           .sr(1)
2821           .m(4)
2822           .n(n)
2823           .k(k)
2824           .cn_stride(19)
2825           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2826       }
2827     }
2828   }
2829 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_subtile)2830   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_subtile) {
2831     TEST_REQUIRES_ARM_NEON_DOT;
2832     for (uint32_t n = 17; n < 32; n++) {
2833       for (size_t k = 1; k <= 80; k += 17) {
2834         for (uint32_t m = 1; m <= 4; m++) {
2835           GemmMicrokernelTester()
2836             .mr(4)
2837             .nr(16)
2838             .kr(4)
2839             .sr(1)
2840             .m(m)
2841             .n(n)
2842             .k(k)
2843             .iterations(1)
2844             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2845         }
2846       }
2847     }
2848   }
2849 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16)2850   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16) {
2851     TEST_REQUIRES_ARM_NEON_DOT;
2852     for (uint32_t n = 32; n <= 48; n += 16) {
2853       for (size_t k = 1; k <= 80; k += 17) {
2854         GemmMicrokernelTester()
2855           .mr(4)
2856           .nr(16)
2857           .kr(4)
2858           .sr(1)
2859           .m(4)
2860           .n(n)
2861           .k(k)
2862           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2863       }
2864     }
2865   }
2866 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_strided_cn)2867   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_strided_cn) {
2868     TEST_REQUIRES_ARM_NEON_DOT;
2869     for (uint32_t n = 32; n <= 48; n += 16) {
2870       for (size_t k = 1; k <= 80; k += 17) {
2871         GemmMicrokernelTester()
2872           .mr(4)
2873           .nr(16)
2874           .kr(4)
2875           .sr(1)
2876           .m(4)
2877           .n(n)
2878           .k(k)
2879           .cn_stride(19)
2880           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2881       }
2882     }
2883   }
2884 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_subtile)2885   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_subtile) {
2886     TEST_REQUIRES_ARM_NEON_DOT;
2887     for (uint32_t n = 32; n <= 48; n += 16) {
2888       for (size_t k = 1; k <= 80; k += 17) {
2889         for (uint32_t m = 1; m <= 4; m++) {
2890           GemmMicrokernelTester()
2891             .mr(4)
2892             .nr(16)
2893             .kr(4)
2894             .sr(1)
2895             .m(m)
2896             .n(n)
2897             .k(k)
2898             .iterations(1)
2899             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2900         }
2901       }
2902     }
2903   }
2904 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,small_kernel)2905   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel) {
2906     TEST_REQUIRES_ARM_NEON_DOT;
2907     for (size_t k = 1; k <= 80; k += 17) {
2908       GemmMicrokernelTester()
2909         .mr(4)
2910         .nr(16)
2911         .kr(4)
2912         .sr(1)
2913         .m(4)
2914         .n(16)
2915         .k(k)
2916         .ks(3)
2917         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2918     }
2919   }
2920 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,small_kernel_subtile)2921   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel_subtile) {
2922     TEST_REQUIRES_ARM_NEON_DOT;
2923     for (size_t k = 1; k <= 80; k += 17) {
2924       for (uint32_t n = 1; n <= 16; n++) {
2925         for (uint32_t m = 1; m <= 4; m++) {
2926           GemmMicrokernelTester()
2927             .mr(4)
2928             .nr(16)
2929             .kr(4)
2930             .sr(1)
2931             .m(m)
2932             .n(n)
2933             .k(k)
2934             .ks(3)
2935             .iterations(1)
2936             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2937         }
2938       }
2939     }
2940   }
2941 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_small_kernel)2942   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_small_kernel) {
2943     TEST_REQUIRES_ARM_NEON_DOT;
2944     for (uint32_t n = 17; n < 32; n++) {
2945       for (size_t k = 1; k <= 80; k += 17) {
2946         GemmMicrokernelTester()
2947           .mr(4)
2948           .nr(16)
2949           .kr(4)
2950           .sr(1)
2951           .m(4)
2952           .n(n)
2953           .k(k)
2954           .ks(3)
2955           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2956       }
2957     }
2958   }
2959 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_small_kernel)2960   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_small_kernel) {
2961     TEST_REQUIRES_ARM_NEON_DOT;
2962     for (uint32_t n = 32; n <= 48; n += 16) {
2963       for (size_t k = 1; k <= 80; k += 17) {
2964         GemmMicrokernelTester()
2965           .mr(4)
2966           .nr(16)
2967           .kr(4)
2968           .sr(1)
2969           .m(4)
2970           .n(n)
2971           .k(k)
2972           .ks(3)
2973           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2974       }
2975     }
2976   }
2977 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cm_subtile)2978   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm_subtile) {
2979     TEST_REQUIRES_ARM_NEON_DOT;
2980     for (size_t k = 1; k <= 80; k += 17) {
2981       for (uint32_t n = 1; n <= 16; n++) {
2982         for (uint32_t m = 1; m <= 4; m++) {
2983           GemmMicrokernelTester()
2984             .mr(4)
2985             .nr(16)
2986             .kr(4)
2987             .sr(1)
2988             .m(m)
2989             .n(n)
2990             .k(k)
2991             .cm_stride(19)
2992             .iterations(1)
2993             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2994         }
2995       }
2996     }
2997   }
2998 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,a_offset)2999   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, a_offset) {
3000     TEST_REQUIRES_ARM_NEON_DOT;
3001     for (size_t k = 1; k <= 80; k += 17) {
3002       GemmMicrokernelTester()
3003         .mr(4)
3004         .nr(16)
3005         .kr(4)
3006         .sr(1)
3007         .m(4)
3008         .n(16)
3009         .k(k)
3010         .ks(3)
3011         .a_offset(331)
3012         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3013     }
3014   }
3015 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,zero)3016   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, zero) {
3017     TEST_REQUIRES_ARM_NEON_DOT;
3018     for (size_t k = 1; k <= 80; k += 17) {
3019       for (uint32_t mz = 0; mz < 4; mz++) {
3020         GemmMicrokernelTester()
3021           .mr(4)
3022           .nr(16)
3023           .kr(4)
3024           .sr(1)
3025           .m(4)
3026           .n(16)
3027           .k(k)
3028           .ks(3)
3029           .a_offset(331)
3030           .zero_index(mz)
3031           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3032       }
3033     }
3034   }
3035 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,qmin)3036   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmin) {
3037     TEST_REQUIRES_ARM_NEON_DOT;
3038     GemmMicrokernelTester()
3039       .mr(4)
3040       .nr(16)
3041       .kr(4)
3042       .sr(1)
3043       .m(4)
3044       .n(16)
3045       .k(16)
3046       .qmin(128)
3047       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3048   }
3049 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,qmax)3050   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmax) {
3051     TEST_REQUIRES_ARM_NEON_DOT;
3052     GemmMicrokernelTester()
3053       .mr(4)
3054       .nr(16)
3055       .kr(4)
3056       .sr(1)
3057       .m(4)
3058       .n(16)
3059       .k(16)
3060       .qmax(128)
3061       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3062   }
3063 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cm)3064   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm) {
3065     TEST_REQUIRES_ARM_NEON_DOT;
3066     GemmMicrokernelTester()
3067       .mr(4)
3068       .nr(16)
3069       .kr(4)
3070       .sr(1)
3071       .m(4)
3072       .n(16)
3073       .k(16)
3074       .cm_stride(19)
3075       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3076   }
3077 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,no_a_zero_point)3078   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, no_a_zero_point) {
3079     TEST_REQUIRES_ARM_NEON_DOT;
3080     for (size_t k = 1; k <= 80; k += 17) {
3081       GemmMicrokernelTester()
3082         .mr(4)
3083         .nr(16)
3084         .kr(4)
3085         .sr(1)
3086         .m(4)
3087         .n(16)
3088         .k(k)
3089         .a_zero_point(0)
3090         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3091     }
3092   }
3093 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,no_b_zero_point)3094   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, no_b_zero_point) {
3095     TEST_REQUIRES_ARM_NEON_DOT;
3096     for (size_t k = 1; k <= 80; k += 17) {
3097       GemmMicrokernelTester()
3098         .mr(4)
3099         .nr(16)
3100         .kr(4)
3101         .sr(1)
3102         .m(4)
3103         .n(16)
3104         .k(k)
3105         .b_zero_point(0)
3106         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3107     }
3108   }
3109 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,no_zero_point)3110   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, no_zero_point) {
3111     TEST_REQUIRES_ARM_NEON_DOT;
3112     for (size_t k = 1; k <= 80; k += 17) {
3113       GemmMicrokernelTester()
3114         .mr(4)
3115         .nr(16)
3116         .kr(4)
3117         .sr(1)
3118         .m(4)
3119         .n(16)
3120         .k(k)
3121         .a_zero_point(0)
3122         .b_zero_point(0)
3123         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3124     }
3125   }
3126 #endif  // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3127 
3128 
3129 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16)3130   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16) {
3131     TEST_REQUIRES_ARM_NEON_DOT;
3132     GemmMicrokernelTester()
3133       .mr(4)
3134       .nr(16)
3135       .kr(4)
3136       .sr(1)
3137       .m(4)
3138       .n(16)
3139       .k(16)
3140       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3141   }
3142 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,strided_cn)3143   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, strided_cn) {
3144     TEST_REQUIRES_ARM_NEON_DOT;
3145     GemmMicrokernelTester()
3146       .mr(4)
3147       .nr(16)
3148       .kr(4)
3149       .sr(1)
3150       .m(4)
3151       .n(16)
3152       .k(16)
3153       .cn_stride(19)
3154       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3155   }
3156 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile)3157   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile) {
3158     TEST_REQUIRES_ARM_NEON_DOT;
3159     for (uint32_t n = 1; n <= 16; n++) {
3160       for (uint32_t m = 1; m <= 4; m++) {
3161         GemmMicrokernelTester()
3162           .mr(4)
3163           .nr(16)
3164           .kr(4)
3165           .sr(1)
3166           .m(m)
3167           .n(n)
3168           .k(16)
3169           .iterations(1)
3170           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3171       }
3172     }
3173   }
3174 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile_m)3175   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_m) {
3176     TEST_REQUIRES_ARM_NEON_DOT;
3177     for (uint32_t m = 1; m <= 4; m++) {
3178       GemmMicrokernelTester()
3179         .mr(4)
3180         .nr(16)
3181         .kr(4)
3182         .sr(1)
3183         .m(m)
3184         .n(16)
3185         .k(16)
3186         .iterations(1)
3187         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3188     }
3189   }
3190 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile_n)3191   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_n) {
3192     TEST_REQUIRES_ARM_NEON_DOT;
3193     for (uint32_t n = 1; n <= 16; n++) {
3194       GemmMicrokernelTester()
3195         .mr(4)
3196         .nr(16)
3197         .kr(4)
3198         .sr(1)
3199         .m(4)
3200         .n(n)
3201         .k(16)
3202         .iterations(1)
3203         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3204     }
3205   }
3206 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_lt_16)3207   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_lt_16) {
3208     TEST_REQUIRES_ARM_NEON_DOT;
3209     for (size_t k = 1; k < 16; k++) {
3210       GemmMicrokernelTester()
3211         .mr(4)
3212         .nr(16)
3213         .kr(4)
3214         .sr(1)
3215         .m(4)
3216         .n(16)
3217         .k(k)
3218         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3219     }
3220   }
3221 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_lt_16_subtile)3222   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_lt_16_subtile) {
3223     TEST_REQUIRES_ARM_NEON_DOT;
3224     for (size_t k = 1; k < 16; k++) {
3225       for (uint32_t n = 1; n <= 16; n++) {
3226         for (uint32_t m = 1; m <= 4; m++) {
3227           GemmMicrokernelTester()
3228             .mr(4)
3229             .nr(16)
3230             .kr(4)
3231             .sr(1)
3232             .m(m)
3233             .n(n)
3234             .k(k)
3235             .iterations(1)
3236             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3237         }
3238       }
3239     }
3240   }
3241 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_gt_16)3242   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_gt_16) {
3243     TEST_REQUIRES_ARM_NEON_DOT;
3244     for (size_t k = 17; k < 32; k++) {
3245       GemmMicrokernelTester()
3246         .mr(4)
3247         .nr(16)
3248         .kr(4)
3249         .sr(1)
3250         .m(4)
3251         .n(16)
3252         .k(k)
3253         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3254     }
3255   }
3256 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_gt_16_subtile)3257   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_gt_16_subtile) {
3258     TEST_REQUIRES_ARM_NEON_DOT;
3259     for (size_t k = 17; k < 32; k++) {
3260       for (uint32_t n = 1; n <= 16; n++) {
3261         for (uint32_t m = 1; m <= 4; m++) {
3262           GemmMicrokernelTester()
3263             .mr(4)
3264             .nr(16)
3265             .kr(4)
3266             .sr(1)
3267             .m(m)
3268             .n(n)
3269             .k(k)
3270             .iterations(1)
3271             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3272         }
3273       }
3274     }
3275   }
3276 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_div_16)3277   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_div_16) {
3278     TEST_REQUIRES_ARM_NEON_DOT;
3279     for (size_t k = 32; k <= 160; k += 16) {
3280       GemmMicrokernelTester()
3281         .mr(4)
3282         .nr(16)
3283         .kr(4)
3284         .sr(1)
3285         .m(4)
3286         .n(16)
3287         .k(k)
3288         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3289     }
3290   }
3291 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_div_16_subtile)3292   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_div_16_subtile) {
3293     TEST_REQUIRES_ARM_NEON_DOT;
3294     for (size_t k = 32; k <= 160; k += 16) {
3295       for (uint32_t n = 1; n <= 16; n++) {
3296         for (uint32_t m = 1; m <= 4; m++) {
3297           GemmMicrokernelTester()
3298             .mr(4)
3299             .nr(16)
3300             .kr(4)
3301             .sr(1)
3302             .m(m)
3303             .n(n)
3304             .k(k)
3305             .iterations(1)
3306             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3307         }
3308       }
3309     }
3310   }
3311 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16)3312   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16) {
3313     TEST_REQUIRES_ARM_NEON_DOT;
3314     for (uint32_t n = 17; n < 32; n++) {
3315       for (size_t k = 1; k <= 80; k += 17) {
3316         GemmMicrokernelTester()
3317           .mr(4)
3318           .nr(16)
3319           .kr(4)
3320           .sr(1)
3321           .m(4)
3322           .n(n)
3323           .k(k)
3324           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3325       }
3326     }
3327   }
3328 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_strided_cn)3329   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_strided_cn) {
3330     TEST_REQUIRES_ARM_NEON_DOT;
3331     for (uint32_t n = 17; n < 32; n++) {
3332       for (size_t k = 1; k <= 80; k += 17) {
3333         GemmMicrokernelTester()
3334           .mr(4)
3335           .nr(16)
3336           .kr(4)
3337           .sr(1)
3338           .m(4)
3339           .n(n)
3340           .k(k)
3341           .cn_stride(19)
3342           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3343       }
3344     }
3345   }
3346 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_subtile)3347   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_subtile) {
3348     TEST_REQUIRES_ARM_NEON_DOT;
3349     for (uint32_t n = 17; n < 32; n++) {
3350       for (size_t k = 1; k <= 80; k += 17) {
3351         for (uint32_t m = 1; m <= 4; m++) {
3352           GemmMicrokernelTester()
3353             .mr(4)
3354             .nr(16)
3355             .kr(4)
3356             .sr(1)
3357             .m(m)
3358             .n(n)
3359             .k(k)
3360             .iterations(1)
3361             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3362         }
3363       }
3364     }
3365   }
3366 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,n_div_16)3367   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, n_div_16) {
3368     TEST_REQUIRES_ARM_NEON_DOT;
3369     for (uint32_t n = 32; n <= 48; n += 16) {
3370       for (size_t k = 1; k <= 80; k += 17) {
3371         GemmMicrokernelTester()
3372           .mr(4)
3373           .nr(16)
3374           .kr(4)
3375           .sr(1)
3376           .m(4)
3377           .n(n)
3378           .k(k)
3379           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3380       }
3381     }
3382   }
3383 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_strided_cn)3384   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_strided_cn) {
3385     TEST_REQUIRES_ARM_NEON_DOT;
3386     for (uint32_t n = 32; n <= 48; n += 16) {
3387       for (size_t k = 1; k <= 80; k += 17) {
3388         GemmMicrokernelTester()
3389           .mr(4)
3390           .nr(16)
3391           .kr(4)
3392           .sr(1)
3393           .m(4)
3394           .n(n)
3395           .k(k)
3396           .cn_stride(19)
3397           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3398       }
3399     }
3400   }
3401 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_subtile)3402   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_subtile) {
3403     TEST_REQUIRES_ARM_NEON_DOT;
3404     for (uint32_t n = 32; n <= 48; n += 16) {
3405       for (size_t k = 1; k <= 80; k += 17) {
3406         for (uint32_t m = 1; m <= 4; m++) {
3407           GemmMicrokernelTester()
3408             .mr(4)
3409             .nr(16)
3410             .kr(4)
3411             .sr(1)
3412             .m(m)
3413             .n(n)
3414             .k(k)
3415             .iterations(1)
3416             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3417         }
3418       }
3419     }
3420   }
3421 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,small_kernel)3422   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, small_kernel) {
3423     TEST_REQUIRES_ARM_NEON_DOT;
3424     for (size_t k = 1; k <= 80; k += 17) {
3425       GemmMicrokernelTester()
3426         .mr(4)
3427         .nr(16)
3428         .kr(4)
3429         .sr(1)
3430         .m(4)
3431         .n(16)
3432         .k(k)
3433         .ks(3)
3434         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3435     }
3436   }
3437 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,small_kernel_subtile)3438   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, small_kernel_subtile) {
3439     TEST_REQUIRES_ARM_NEON_DOT;
3440     for (size_t k = 1; k <= 80; k += 17) {
3441       for (uint32_t n = 1; n <= 16; n++) {
3442         for (uint32_t m = 1; m <= 4; m++) {
3443           GemmMicrokernelTester()
3444             .mr(4)
3445             .nr(16)
3446             .kr(4)
3447             .sr(1)
3448             .m(m)
3449             .n(n)
3450             .k(k)
3451             .ks(3)
3452             .iterations(1)
3453             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3454         }
3455       }
3456     }
3457   }
3458 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_small_kernel)3459   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_small_kernel) {
3460     TEST_REQUIRES_ARM_NEON_DOT;
3461     for (uint32_t n = 17; n < 32; n++) {
3462       for (size_t k = 1; k <= 80; k += 17) {
3463         GemmMicrokernelTester()
3464           .mr(4)
3465           .nr(16)
3466           .kr(4)
3467           .sr(1)
3468           .m(4)
3469           .n(n)
3470           .k(k)
3471           .ks(3)
3472           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3473       }
3474     }
3475   }
3476 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_small_kernel)3477   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_small_kernel) {
3478     TEST_REQUIRES_ARM_NEON_DOT;
3479     for (uint32_t n = 32; n <= 48; n += 16) {
3480       for (size_t k = 1; k <= 80; k += 17) {
3481         GemmMicrokernelTester()
3482           .mr(4)
3483           .nr(16)
3484           .kr(4)
3485           .sr(1)
3486           .m(4)
3487           .n(n)
3488           .k(k)
3489           .ks(3)
3490           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3491       }
3492     }
3493   }
3494 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,strided_cm_subtile)3495   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, strided_cm_subtile) {
3496     TEST_REQUIRES_ARM_NEON_DOT;
3497     for (size_t k = 1; k <= 80; k += 17) {
3498       for (uint32_t n = 1; n <= 16; n++) {
3499         for (uint32_t m = 1; m <= 4; m++) {
3500           GemmMicrokernelTester()
3501             .mr(4)
3502             .nr(16)
3503             .kr(4)
3504             .sr(1)
3505             .m(m)
3506             .n(n)
3507             .k(k)
3508             .cm_stride(19)
3509             .iterations(1)
3510             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3511         }
3512       }
3513     }
3514   }
3515 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,a_offset)3516   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, a_offset) {
3517     TEST_REQUIRES_ARM_NEON_DOT;
3518     for (size_t k = 1; k <= 80; k += 17) {
3519       GemmMicrokernelTester()
3520         .mr(4)
3521         .nr(16)
3522         .kr(4)
3523         .sr(1)
3524         .m(4)
3525         .n(16)
3526         .k(k)
3527         .ks(3)
3528         .a_offset(331)
3529         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3530     }
3531   }
3532 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,zero)3533   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, zero) {
3534     TEST_REQUIRES_ARM_NEON_DOT;
3535     for (size_t k = 1; k <= 80; k += 17) {
3536       for (uint32_t mz = 0; mz < 4; mz++) {
3537         GemmMicrokernelTester()
3538           .mr(4)
3539           .nr(16)
3540           .kr(4)
3541           .sr(1)
3542           .m(4)
3543           .n(16)
3544           .k(k)
3545           .ks(3)
3546           .a_offset(331)
3547           .zero_index(mz)
3548           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3549       }
3550     }
3551   }
3552 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,qmin)3553   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, qmin) {
3554     TEST_REQUIRES_ARM_NEON_DOT;
3555     GemmMicrokernelTester()
3556       .mr(4)
3557       .nr(16)
3558       .kr(4)
3559       .sr(1)
3560       .m(4)
3561       .n(16)
3562       .k(16)
3563       .qmin(128)
3564       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3565   }
3566 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,qmax)3567   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, qmax) {
3568     TEST_REQUIRES_ARM_NEON_DOT;
3569     GemmMicrokernelTester()
3570       .mr(4)
3571       .nr(16)
3572       .kr(4)
3573       .sr(1)
3574       .m(4)
3575       .n(16)
3576       .k(16)
3577       .qmax(128)
3578       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3579   }
3580 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,strided_cm)3581   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, strided_cm) {
3582     TEST_REQUIRES_ARM_NEON_DOT;
3583     GemmMicrokernelTester()
3584       .mr(4)
3585       .nr(16)
3586       .kr(4)
3587       .sr(1)
3588       .m(4)
3589       .n(16)
3590       .k(16)
3591       .cm_stride(19)
3592       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3593   }
3594 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,no_a_zero_point)3595   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, no_a_zero_point) {
3596     TEST_REQUIRES_ARM_NEON_DOT;
3597     for (size_t k = 1; k <= 80; k += 17) {
3598       GemmMicrokernelTester()
3599         .mr(4)
3600         .nr(16)
3601         .kr(4)
3602         .sr(1)
3603         .m(4)
3604         .n(16)
3605         .k(k)
3606         .a_zero_point(0)
3607         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3608     }
3609   }
3610 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,no_b_zero_point)3611   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, no_b_zero_point) {
3612     TEST_REQUIRES_ARM_NEON_DOT;
3613     for (size_t k = 1; k <= 80; k += 17) {
3614       GemmMicrokernelTester()
3615         .mr(4)
3616         .nr(16)
3617         .kr(4)
3618         .sr(1)
3619         .m(4)
3620         .n(16)
3621         .k(k)
3622         .b_zero_point(0)
3623         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3624     }
3625   }
3626 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,no_zero_point)3627   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, no_zero_point) {
3628     TEST_REQUIRES_ARM_NEON_DOT;
3629     for (size_t k = 1; k <= 80; k += 17) {
3630       GemmMicrokernelTester()
3631         .mr(4)
3632         .nr(16)
3633         .kr(4)
3634         .sr(1)
3635         .m(4)
3636         .n(16)
3637         .k(k)
3638         .a_zero_point(0)
3639         .b_zero_point(0)
3640         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3641     }
3642   }
3643 #endif  // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3644 
3645 
3646 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_eq_8)3647   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8) {
3648     TEST_REQUIRES_ARM_NEON;
3649     GemmMicrokernelTester()
3650       .mr(1)
3651       .nr(8)
3652       .kr(1)
3653       .sr(1)
3654       .m(1)
3655       .n(8)
3656       .k(8)
3657       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3658   }
3659 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,strided_cn)3660   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, strided_cn) {
3661     TEST_REQUIRES_ARM_NEON;
3662     GemmMicrokernelTester()
3663       .mr(1)
3664       .nr(8)
3665       .kr(1)
3666       .sr(1)
3667       .m(1)
3668       .n(8)
3669       .k(8)
3670       .cn_stride(11)
3671       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3672   }
3673 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_eq_8_subtile)3674   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8_subtile) {
3675     TEST_REQUIRES_ARM_NEON;
3676     for (uint32_t n = 1; n <= 8; n++) {
3677       for (uint32_t m = 1; m <= 1; m++) {
3678         GemmMicrokernelTester()
3679           .mr(1)
3680           .nr(8)
3681           .kr(1)
3682           .sr(1)
3683           .m(m)
3684           .n(n)
3685           .k(8)
3686           .iterations(1)
3687           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3688       }
3689     }
3690   }
3691 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_eq_8_subtile_m)3692   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
3693     TEST_REQUIRES_ARM_NEON;
3694     for (uint32_t m = 1; m <= 1; m++) {
3695       GemmMicrokernelTester()
3696         .mr(1)
3697         .nr(8)
3698         .kr(1)
3699         .sr(1)
3700         .m(m)
3701         .n(8)
3702         .k(8)
3703         .iterations(1)
3704         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3705     }
3706   }
3707 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_eq_8_subtile_n)3708   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
3709     TEST_REQUIRES_ARM_NEON;
3710     for (uint32_t n = 1; n <= 8; n++) {
3711       GemmMicrokernelTester()
3712         .mr(1)
3713         .nr(8)
3714         .kr(1)
3715         .sr(1)
3716         .m(1)
3717         .n(n)
3718         .k(8)
3719         .iterations(1)
3720         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3721     }
3722   }
3723 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_lt_8)3724   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_lt_8) {
3725     TEST_REQUIRES_ARM_NEON;
3726     for (size_t k = 1; k < 8; k++) {
3727       GemmMicrokernelTester()
3728         .mr(1)
3729         .nr(8)
3730         .kr(1)
3731         .sr(1)
3732         .m(1)
3733         .n(8)
3734         .k(k)
3735         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3736     }
3737   }
3738 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_lt_8_subtile)3739   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_lt_8_subtile) {
3740     TEST_REQUIRES_ARM_NEON;
3741     for (size_t k = 1; k < 8; k++) {
3742       for (uint32_t n = 1; n <= 8; n++) {
3743         for (uint32_t m = 1; m <= 1; m++) {
3744           GemmMicrokernelTester()
3745             .mr(1)
3746             .nr(8)
3747             .kr(1)
3748             .sr(1)
3749             .m(m)
3750             .n(n)
3751             .k(k)
3752             .iterations(1)
3753             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3754         }
3755       }
3756     }
3757   }
3758 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_gt_8)3759   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_gt_8) {
3760     TEST_REQUIRES_ARM_NEON;
3761     for (size_t k = 9; k < 16; k++) {
3762       GemmMicrokernelTester()
3763         .mr(1)
3764         .nr(8)
3765         .kr(1)
3766         .sr(1)
3767         .m(1)
3768         .n(8)
3769         .k(k)
3770         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3771     }
3772   }
3773 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_gt_8_subtile)3774   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_gt_8_subtile) {
3775     TEST_REQUIRES_ARM_NEON;
3776     for (size_t k = 9; k < 16; k++) {
3777       for (uint32_t n = 1; n <= 8; n++) {
3778         for (uint32_t m = 1; m <= 1; m++) {
3779           GemmMicrokernelTester()
3780             .mr(1)
3781             .nr(8)
3782             .kr(1)
3783             .sr(1)
3784             .m(m)
3785             .n(n)
3786             .k(k)
3787             .iterations(1)
3788             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3789         }
3790       }
3791     }
3792   }
3793 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_div_8)3794   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_div_8) {
3795     TEST_REQUIRES_ARM_NEON;
3796     for (size_t k = 16; k <= 80; k += 8) {
3797       GemmMicrokernelTester()
3798         .mr(1)
3799         .nr(8)
3800         .kr(1)
3801         .sr(1)
3802         .m(1)
3803         .n(8)
3804         .k(k)
3805         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3806     }
3807   }
3808 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_div_8_subtile)3809   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_div_8_subtile) {
3810     TEST_REQUIRES_ARM_NEON;
3811     for (size_t k = 16; k <= 80; k += 8) {
3812       for (uint32_t n = 1; n <= 8; n++) {
3813         for (uint32_t m = 1; m <= 1; m++) {
3814           GemmMicrokernelTester()
3815             .mr(1)
3816             .nr(8)
3817             .kr(1)
3818             .sr(1)
3819             .m(m)
3820             .n(n)
3821             .k(k)
3822             .iterations(1)
3823             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3824         }
3825       }
3826     }
3827   }
3828 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,n_gt_8)3829   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_gt_8) {
3830     TEST_REQUIRES_ARM_NEON;
3831     for (uint32_t n = 9; n < 16; n++) {
3832       for (size_t k = 1; k <= 40; k += 9) {
3833         GemmMicrokernelTester()
3834           .mr(1)
3835           .nr(8)
3836           .kr(1)
3837           .sr(1)
3838           .m(1)
3839           .n(n)
3840           .k(k)
3841           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3842       }
3843     }
3844   }
3845 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,n_gt_8_strided_cn)3846   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
3847     TEST_REQUIRES_ARM_NEON;
3848     for (uint32_t n = 9; n < 16; n++) {
3849       for (size_t k = 1; k <= 40; k += 9) {
3850         GemmMicrokernelTester()
3851           .mr(1)
3852           .nr(8)
3853           .kr(1)
3854           .sr(1)
3855           .m(1)
3856           .n(n)
3857           .k(k)
3858           .cn_stride(11)
3859           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3860       }
3861     }
3862   }
3863 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,n_gt_8_subtile)3864   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_gt_8_subtile) {
3865     TEST_REQUIRES_ARM_NEON;
3866     for (uint32_t n = 9; n < 16; n++) {
3867       for (size_t k = 1; k <= 40; k += 9) {
3868         for (uint32_t m = 1; m <= 1; m++) {
3869           GemmMicrokernelTester()
3870             .mr(1)
3871             .nr(8)
3872             .kr(1)
3873             .sr(1)
3874             .m(m)
3875             .n(n)
3876             .k(k)
3877             .iterations(1)
3878             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3879         }
3880       }
3881     }
3882   }
3883 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,n_div_8)3884   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_div_8) {
3885     TEST_REQUIRES_ARM_NEON;
3886     for (uint32_t n = 16; n <= 24; n += 8) {
3887       for (size_t k = 1; k <= 40; k += 9) {
3888         GemmMicrokernelTester()
3889           .mr(1)
3890           .nr(8)
3891           .kr(1)
3892           .sr(1)
3893           .m(1)
3894           .n(n)
3895           .k(k)
3896           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3897       }
3898     }
3899   }
3900 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,n_div_8_strided_cn)3901   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
3902     TEST_REQUIRES_ARM_NEON;
3903     for (uint32_t n = 16; n <= 24; n += 8) {
3904       for (size_t k = 1; k <= 40; k += 9) {
3905         GemmMicrokernelTester()
3906           .mr(1)
3907           .nr(8)
3908           .kr(1)
3909           .sr(1)
3910           .m(1)
3911           .n(n)
3912           .k(k)
3913           .cn_stride(11)
3914           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3915       }
3916     }
3917   }
3918 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,n_div_8_subtile)3919   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_div_8_subtile) {
3920     TEST_REQUIRES_ARM_NEON;
3921     for (uint32_t n = 16; n <= 24; n += 8) {
3922       for (size_t k = 1; k <= 40; k += 9) {
3923         for (uint32_t m = 1; m <= 1; m++) {
3924           GemmMicrokernelTester()
3925             .mr(1)
3926             .nr(8)
3927             .kr(1)
3928             .sr(1)
3929             .m(m)
3930             .n(n)
3931             .k(k)
3932             .iterations(1)
3933             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3934         }
3935       }
3936     }
3937   }
3938 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,small_kernel)3939   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, small_kernel) {
3940     TEST_REQUIRES_ARM_NEON;
3941     for (size_t k = 1; k <= 40; k += 9) {
3942       GemmMicrokernelTester()
3943         .mr(1)
3944         .nr(8)
3945         .kr(1)
3946         .sr(1)
3947         .m(1)
3948         .n(8)
3949         .k(k)
3950         .ks(3)
3951         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3952     }
3953   }
3954 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,small_kernel_subtile)3955   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, small_kernel_subtile) {
3956     TEST_REQUIRES_ARM_NEON;
3957     for (size_t k = 1; k <= 40; k += 9) {
3958       for (uint32_t n = 1; n <= 8; n++) {
3959         for (uint32_t m = 1; m <= 1; m++) {
3960           GemmMicrokernelTester()
3961             .mr(1)
3962             .nr(8)
3963             .kr(1)
3964             .sr(1)
3965             .m(m)
3966             .n(n)
3967             .k(k)
3968             .ks(3)
3969             .iterations(1)
3970             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3971         }
3972       }
3973     }
3974   }
3975 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,n_gt_8_small_kernel)3976   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
3977     TEST_REQUIRES_ARM_NEON;
3978     for (uint32_t n = 9; n < 16; n++) {
3979       for (size_t k = 1; k <= 40; k += 9) {
3980         GemmMicrokernelTester()
3981           .mr(1)
3982           .nr(8)
3983           .kr(1)
3984           .sr(1)
3985           .m(1)
3986           .n(n)
3987           .k(k)
3988           .ks(3)
3989           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3990       }
3991     }
3992   }
3993 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,n_div_8_small_kernel)3994   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
3995     TEST_REQUIRES_ARM_NEON;
3996     for (uint32_t n = 16; n <= 24; n += 8) {
3997       for (size_t k = 1; k <= 40; k += 9) {
3998         GemmMicrokernelTester()
3999           .mr(1)
4000           .nr(8)
4001           .kr(1)
4002           .sr(1)
4003           .m(1)
4004           .n(n)
4005           .k(k)
4006           .ks(3)
4007           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4008       }
4009     }
4010   }
4011 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,strided_cm_subtile)4012   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, strided_cm_subtile) {
4013     TEST_REQUIRES_ARM_NEON;
4014     for (size_t k = 1; k <= 40; k += 9) {
4015       for (uint32_t n = 1; n <= 8; n++) {
4016         for (uint32_t m = 1; m <= 1; m++) {
4017           GemmMicrokernelTester()
4018             .mr(1)
4019             .nr(8)
4020             .kr(1)
4021             .sr(1)
4022             .m(m)
4023             .n(n)
4024             .k(k)
4025             .cm_stride(11)
4026             .iterations(1)
4027             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4028         }
4029       }
4030     }
4031   }
4032 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,a_offset)4033   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, a_offset) {
4034     TEST_REQUIRES_ARM_NEON;
4035     for (size_t k = 1; k <= 40; k += 9) {
4036       GemmMicrokernelTester()
4037         .mr(1)
4038         .nr(8)
4039         .kr(1)
4040         .sr(1)
4041         .m(1)
4042         .n(8)
4043         .k(k)
4044         .ks(3)
4045         .a_offset(43)
4046         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4047     }
4048   }
4049 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,zero)4050   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, zero) {
4051     TEST_REQUIRES_ARM_NEON;
4052     for (size_t k = 1; k <= 40; k += 9) {
4053       for (uint32_t mz = 0; mz < 1; mz++) {
4054         GemmMicrokernelTester()
4055           .mr(1)
4056           .nr(8)
4057           .kr(1)
4058           .sr(1)
4059           .m(1)
4060           .n(8)
4061           .k(k)
4062           .ks(3)
4063           .a_offset(43)
4064           .zero_index(mz)
4065           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4066       }
4067     }
4068   }
4069 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,qmin)4070   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, qmin) {
4071     TEST_REQUIRES_ARM_NEON;
4072     GemmMicrokernelTester()
4073       .mr(1)
4074       .nr(8)
4075       .kr(1)
4076       .sr(1)
4077       .m(1)
4078       .n(8)
4079       .k(8)
4080       .qmin(128)
4081       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4082   }
4083 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,qmax)4084   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, qmax) {
4085     TEST_REQUIRES_ARM_NEON;
4086     GemmMicrokernelTester()
4087       .mr(1)
4088       .nr(8)
4089       .kr(1)
4090       .sr(1)
4091       .m(1)
4092       .n(8)
4093       .k(8)
4094       .qmax(128)
4095       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4096   }
4097 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,strided_cm)4098   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, strided_cm) {
4099     TEST_REQUIRES_ARM_NEON;
4100     GemmMicrokernelTester()
4101       .mr(1)
4102       .nr(8)
4103       .kr(1)
4104       .sr(1)
4105       .m(1)
4106       .n(8)
4107       .k(8)
4108       .cm_stride(11)
4109       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4110   }
4111 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,no_a_zero_point)4112   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, no_a_zero_point) {
4113     TEST_REQUIRES_ARM_NEON;
4114     for (size_t k = 1; k <= 40; k += 9) {
4115       GemmMicrokernelTester()
4116         .mr(1)
4117         .nr(8)
4118         .kr(1)
4119         .sr(1)
4120         .m(1)
4121         .n(8)
4122         .k(k)
4123         .a_zero_point(0)
4124         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4125     }
4126   }
4127 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,no_b_zero_point)4128   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, no_b_zero_point) {
4129     TEST_REQUIRES_ARM_NEON;
4130     for (size_t k = 1; k <= 40; k += 9) {
4131       GemmMicrokernelTester()
4132         .mr(1)
4133         .nr(8)
4134         .kr(1)
4135         .sr(1)
4136         .m(1)
4137         .n(8)
4138         .k(k)
4139         .b_zero_point(0)
4140         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4141     }
4142   }
4143 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,no_zero_point)4144   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, no_zero_point) {
4145     TEST_REQUIRES_ARM_NEON;
4146     for (size_t k = 1; k <= 40; k += 9) {
4147       GemmMicrokernelTester()
4148         .mr(1)
4149         .nr(8)
4150         .kr(1)
4151         .sr(1)
4152         .m(1)
4153         .n(8)
4154         .k(k)
4155         .a_zero_point(0)
4156         .b_zero_point(0)
4157         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4158     }
4159   }
4160 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
4161 
4162 
4163 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_eq_8)4164   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8) {
4165     TEST_REQUIRES_ARM_NEON_DOT;
4166     GemmMicrokernelTester()
4167       .mr(1)
4168       .nr(8)
4169       .kr(4)
4170       .sr(1)
4171       .m(1)
4172       .n(8)
4173       .k(8)
4174       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4175   }
4176 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,strided_cn)4177   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, strided_cn) {
4178     TEST_REQUIRES_ARM_NEON_DOT;
4179     GemmMicrokernelTester()
4180       .mr(1)
4181       .nr(8)
4182       .kr(4)
4183       .sr(1)
4184       .m(1)
4185       .n(8)
4186       .k(8)
4187       .cn_stride(11)
4188       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4189   }
4190 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_eq_8_subtile)4191   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8_subtile) {
4192     TEST_REQUIRES_ARM_NEON_DOT;
4193     for (uint32_t n = 1; n <= 8; n++) {
4194       for (uint32_t m = 1; m <= 1; m++) {
4195         GemmMicrokernelTester()
4196           .mr(1)
4197           .nr(8)
4198           .kr(4)
4199           .sr(1)
4200           .m(m)
4201           .n(n)
4202           .k(8)
4203           .iterations(1)
4204           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4205       }
4206     }
4207   }
4208 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_eq_8_subtile_m)4209   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8_subtile_m) {
4210     TEST_REQUIRES_ARM_NEON_DOT;
4211     for (uint32_t m = 1; m <= 1; m++) {
4212       GemmMicrokernelTester()
4213         .mr(1)
4214         .nr(8)
4215         .kr(4)
4216         .sr(1)
4217         .m(m)
4218         .n(8)
4219         .k(8)
4220         .iterations(1)
4221         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4222     }
4223   }
4224 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_eq_8_subtile_n)4225   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8_subtile_n) {
4226     TEST_REQUIRES_ARM_NEON_DOT;
4227     for (uint32_t n = 1; n <= 8; n++) {
4228       GemmMicrokernelTester()
4229         .mr(1)
4230         .nr(8)
4231         .kr(4)
4232         .sr(1)
4233         .m(1)
4234         .n(n)
4235         .k(8)
4236         .iterations(1)
4237         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4238     }
4239   }
4240 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_lt_8)4241   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_lt_8) {
4242     TEST_REQUIRES_ARM_NEON_DOT;
4243     for (size_t k = 1; k < 8; k++) {
4244       GemmMicrokernelTester()
4245         .mr(1)
4246         .nr(8)
4247         .kr(4)
4248         .sr(1)
4249         .m(1)
4250         .n(8)
4251         .k(k)
4252         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4253     }
4254   }
4255 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_lt_8_subtile)4256   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_lt_8_subtile) {
4257     TEST_REQUIRES_ARM_NEON_DOT;
4258     for (size_t k = 1; k < 8; k++) {
4259       for (uint32_t n = 1; n <= 8; n++) {
4260         for (uint32_t m = 1; m <= 1; m++) {
4261           GemmMicrokernelTester()
4262             .mr(1)
4263             .nr(8)
4264             .kr(4)
4265             .sr(1)
4266             .m(m)
4267             .n(n)
4268             .k(k)
4269             .iterations(1)
4270             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4271         }
4272       }
4273     }
4274   }
4275 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_gt_8)4276   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_gt_8) {
4277     TEST_REQUIRES_ARM_NEON_DOT;
4278     for (size_t k = 9; k < 16; k++) {
4279       GemmMicrokernelTester()
4280         .mr(1)
4281         .nr(8)
4282         .kr(4)
4283         .sr(1)
4284         .m(1)
4285         .n(8)
4286         .k(k)
4287         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4288     }
4289   }
4290 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_gt_8_subtile)4291   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_gt_8_subtile) {
4292     TEST_REQUIRES_ARM_NEON_DOT;
4293     for (size_t k = 9; k < 16; k++) {
4294       for (uint32_t n = 1; n <= 8; n++) {
4295         for (uint32_t m = 1; m <= 1; m++) {
4296           GemmMicrokernelTester()
4297             .mr(1)
4298             .nr(8)
4299             .kr(4)
4300             .sr(1)
4301             .m(m)
4302             .n(n)
4303             .k(k)
4304             .iterations(1)
4305             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4306         }
4307       }
4308     }
4309   }
4310 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_div_8)4311   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_div_8) {
4312     TEST_REQUIRES_ARM_NEON_DOT;
4313     for (size_t k = 16; k <= 80; k += 8) {
4314       GemmMicrokernelTester()
4315         .mr(1)
4316         .nr(8)
4317         .kr(4)
4318         .sr(1)
4319         .m(1)
4320         .n(8)
4321         .k(k)
4322         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4323     }
4324   }
4325 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_div_8_subtile)4326   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_div_8_subtile) {
4327     TEST_REQUIRES_ARM_NEON_DOT;
4328     for (size_t k = 16; k <= 80; k += 8) {
4329       for (uint32_t n = 1; n <= 8; n++) {
4330         for (uint32_t m = 1; m <= 1; m++) {
4331           GemmMicrokernelTester()
4332             .mr(1)
4333             .nr(8)
4334             .kr(4)
4335             .sr(1)
4336             .m(m)
4337             .n(n)
4338             .k(k)
4339             .iterations(1)
4340             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4341         }
4342       }
4343     }
4344   }
4345 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,n_gt_8)4346   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8) {
4347     TEST_REQUIRES_ARM_NEON_DOT;
4348     for (uint32_t n = 9; n < 16; n++) {
4349       for (size_t k = 1; k <= 40; k += 9) {
4350         GemmMicrokernelTester()
4351           .mr(1)
4352           .nr(8)
4353           .kr(4)
4354           .sr(1)
4355           .m(1)
4356           .n(n)
4357           .k(k)
4358           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4359       }
4360     }
4361   }
4362 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,n_gt_8_strided_cn)4363   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8_strided_cn) {
4364     TEST_REQUIRES_ARM_NEON_DOT;
4365     for (uint32_t n = 9; n < 16; n++) {
4366       for (size_t k = 1; k <= 40; k += 9) {
4367         GemmMicrokernelTester()
4368           .mr(1)
4369           .nr(8)
4370           .kr(4)
4371           .sr(1)
4372           .m(1)
4373           .n(n)
4374           .k(k)
4375           .cn_stride(11)
4376           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4377       }
4378     }
4379   }
4380 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,n_gt_8_subtile)4381   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8_subtile) {
4382     TEST_REQUIRES_ARM_NEON_DOT;
4383     for (uint32_t n = 9; n < 16; n++) {
4384       for (size_t k = 1; k <= 40; k += 9) {
4385         for (uint32_t m = 1; m <= 1; m++) {
4386           GemmMicrokernelTester()
4387             .mr(1)
4388             .nr(8)
4389             .kr(4)
4390             .sr(1)
4391             .m(m)
4392             .n(n)
4393             .k(k)
4394             .iterations(1)
4395             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4396         }
4397       }
4398     }
4399   }
4400 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,n_div_8)4401   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8) {
4402     TEST_REQUIRES_ARM_NEON_DOT;
4403     for (uint32_t n = 16; n <= 24; n += 8) {
4404       for (size_t k = 1; k <= 40; k += 9) {
4405         GemmMicrokernelTester()
4406           .mr(1)
4407           .nr(8)
4408           .kr(4)
4409           .sr(1)
4410           .m(1)
4411           .n(n)
4412           .k(k)
4413           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4414       }
4415     }
4416   }
4417 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,n_div_8_strided_cn)4418   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8_strided_cn) {
4419     TEST_REQUIRES_ARM_NEON_DOT;
4420     for (uint32_t n = 16; n <= 24; n += 8) {
4421       for (size_t k = 1; k <= 40; k += 9) {
4422         GemmMicrokernelTester()
4423           .mr(1)
4424           .nr(8)
4425           .kr(4)
4426           .sr(1)
4427           .m(1)
4428           .n(n)
4429           .k(k)
4430           .cn_stride(11)
4431           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4432       }
4433     }
4434   }
4435 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,n_div_8_subtile)4436   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8_subtile) {
4437     TEST_REQUIRES_ARM_NEON_DOT;
4438     for (uint32_t n = 16; n <= 24; n += 8) {
4439       for (size_t k = 1; k <= 40; k += 9) {
4440         for (uint32_t m = 1; m <= 1; m++) {
4441           GemmMicrokernelTester()
4442             .mr(1)
4443             .nr(8)
4444             .kr(4)
4445             .sr(1)
4446             .m(m)
4447             .n(n)
4448             .k(k)
4449             .iterations(1)
4450             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4451         }
4452       }
4453     }
4454   }
4455 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,small_kernel)4456   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, small_kernel) {
4457     TEST_REQUIRES_ARM_NEON_DOT;
4458     for (size_t k = 1; k <= 40; k += 9) {
4459       GemmMicrokernelTester()
4460         .mr(1)
4461         .nr(8)
4462         .kr(4)
4463         .sr(1)
4464         .m(1)
4465         .n(8)
4466         .k(k)
4467         .ks(3)
4468         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4469     }
4470   }
4471 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,small_kernel_subtile)4472   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, small_kernel_subtile) {
4473     TEST_REQUIRES_ARM_NEON_DOT;
4474     for (size_t k = 1; k <= 40; k += 9) {
4475       for (uint32_t n = 1; n <= 8; n++) {
4476         for (uint32_t m = 1; m <= 1; m++) {
4477           GemmMicrokernelTester()
4478             .mr(1)
4479             .nr(8)
4480             .kr(4)
4481             .sr(1)
4482             .m(m)
4483             .n(n)
4484             .k(k)
4485             .ks(3)
4486             .iterations(1)
4487             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4488         }
4489       }
4490     }
4491   }
4492 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,n_gt_8_small_kernel)4493   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8_small_kernel) {
4494     TEST_REQUIRES_ARM_NEON_DOT;
4495     for (uint32_t n = 9; n < 16; n++) {
4496       for (size_t k = 1; k <= 40; k += 9) {
4497         GemmMicrokernelTester()
4498           .mr(1)
4499           .nr(8)
4500           .kr(4)
4501           .sr(1)
4502           .m(1)
4503           .n(n)
4504           .k(k)
4505           .ks(3)
4506           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4507       }
4508     }
4509   }
4510 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,n_div_8_small_kernel)4511   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8_small_kernel) {
4512     TEST_REQUIRES_ARM_NEON_DOT;
4513     for (uint32_t n = 16; n <= 24; n += 8) {
4514       for (size_t k = 1; k <= 40; k += 9) {
4515         GemmMicrokernelTester()
4516           .mr(1)
4517           .nr(8)
4518           .kr(4)
4519           .sr(1)
4520           .m(1)
4521           .n(n)
4522           .k(k)
4523           .ks(3)
4524           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4525       }
4526     }
4527   }
4528 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,strided_cm_subtile)4529   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, strided_cm_subtile) {
4530     TEST_REQUIRES_ARM_NEON_DOT;
4531     for (size_t k = 1; k <= 40; k += 9) {
4532       for (uint32_t n = 1; n <= 8; n++) {
4533         for (uint32_t m = 1; m <= 1; m++) {
4534           GemmMicrokernelTester()
4535             .mr(1)
4536             .nr(8)
4537             .kr(4)
4538             .sr(1)
4539             .m(m)
4540             .n(n)
4541             .k(k)
4542             .cm_stride(11)
4543             .iterations(1)
4544             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4545         }
4546       }
4547     }
4548   }
4549 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,a_offset)4550   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, a_offset) {
4551     TEST_REQUIRES_ARM_NEON_DOT;
4552     for (size_t k = 1; k <= 40; k += 9) {
4553       GemmMicrokernelTester()
4554         .mr(1)
4555         .nr(8)
4556         .kr(4)
4557         .sr(1)
4558         .m(1)
4559         .n(8)
4560         .k(k)
4561         .ks(3)
4562         .a_offset(43)
4563         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4564     }
4565   }
4566 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,zero)4567   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, zero) {
4568     TEST_REQUIRES_ARM_NEON_DOT;
4569     for (size_t k = 1; k <= 40; k += 9) {
4570       for (uint32_t mz = 0; mz < 1; mz++) {
4571         GemmMicrokernelTester()
4572           .mr(1)
4573           .nr(8)
4574           .kr(4)
4575           .sr(1)
4576           .m(1)
4577           .n(8)
4578           .k(k)
4579           .ks(3)
4580           .a_offset(43)
4581           .zero_index(mz)
4582           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4583       }
4584     }
4585   }
4586 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,qmin)4587   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, qmin) {
4588     TEST_REQUIRES_ARM_NEON_DOT;
4589     GemmMicrokernelTester()
4590       .mr(1)
4591       .nr(8)
4592       .kr(4)
4593       .sr(1)
4594       .m(1)
4595       .n(8)
4596       .k(8)
4597       .qmin(128)
4598       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4599   }
4600 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,qmax)4601   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, qmax) {
4602     TEST_REQUIRES_ARM_NEON_DOT;
4603     GemmMicrokernelTester()
4604       .mr(1)
4605       .nr(8)
4606       .kr(4)
4607       .sr(1)
4608       .m(1)
4609       .n(8)
4610       .k(8)
4611       .qmax(128)
4612       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4613   }
4614 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,strided_cm)4615   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, strided_cm) {
4616     TEST_REQUIRES_ARM_NEON_DOT;
4617     GemmMicrokernelTester()
4618       .mr(1)
4619       .nr(8)
4620       .kr(4)
4621       .sr(1)
4622       .m(1)
4623       .n(8)
4624       .k(8)
4625       .cm_stride(11)
4626       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4627   }
4628 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,no_a_zero_point)4629   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, no_a_zero_point) {
4630     TEST_REQUIRES_ARM_NEON_DOT;
4631     for (size_t k = 1; k <= 40; k += 9) {
4632       GemmMicrokernelTester()
4633         .mr(1)
4634         .nr(8)
4635         .kr(4)
4636         .sr(1)
4637         .m(1)
4638         .n(8)
4639         .k(k)
4640         .a_zero_point(0)
4641         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4642     }
4643   }
4644 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,no_b_zero_point)4645   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, no_b_zero_point) {
4646     TEST_REQUIRES_ARM_NEON_DOT;
4647     for (size_t k = 1; k <= 40; k += 9) {
4648       GemmMicrokernelTester()
4649         .mr(1)
4650         .nr(8)
4651         .kr(4)
4652         .sr(1)
4653         .m(1)
4654         .n(8)
4655         .k(k)
4656         .b_zero_point(0)
4657         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4658     }
4659   }
4660 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,no_zero_point)4661   TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, no_zero_point) {
4662     TEST_REQUIRES_ARM_NEON_DOT;
4663     for (size_t k = 1; k <= 40; k += 9) {
4664       GemmMicrokernelTester()
4665         .mr(1)
4666         .nr(8)
4667         .kr(4)
4668         .sr(1)
4669         .m(1)
4670         .n(8)
4671         .k(k)
4672         .a_zero_point(0)
4673         .b_zero_point(0)
4674         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4675     }
4676   }
4677 #endif  // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
4678 
4679 
4680 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_eq_8)4681   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8) {
4682     TEST_REQUIRES_ARM_NEON;
4683     GemmMicrokernelTester()
4684       .mr(1)
4685       .nr(16)
4686       .kr(1)
4687       .sr(1)
4688       .m(1)
4689       .n(16)
4690       .k(8)
4691       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4692   }
4693 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,strided_cn)4694   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, strided_cn) {
4695     TEST_REQUIRES_ARM_NEON;
4696     GemmMicrokernelTester()
4697       .mr(1)
4698       .nr(16)
4699       .kr(1)
4700       .sr(1)
4701       .m(1)
4702       .n(16)
4703       .k(8)
4704       .cn_stride(19)
4705       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4706   }
4707 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_eq_8_subtile)4708   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8_subtile) {
4709     TEST_REQUIRES_ARM_NEON;
4710     for (uint32_t n = 1; n <= 16; n++) {
4711       for (uint32_t m = 1; m <= 1; m++) {
4712         GemmMicrokernelTester()
4713           .mr(1)
4714           .nr(16)
4715           .kr(1)
4716           .sr(1)
4717           .m(m)
4718           .n(n)
4719           .k(8)
4720           .iterations(1)
4721           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4722       }
4723     }
4724   }
4725 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_eq_8_subtile_m)4726   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
4727     TEST_REQUIRES_ARM_NEON;
4728     for (uint32_t m = 1; m <= 1; m++) {
4729       GemmMicrokernelTester()
4730         .mr(1)
4731         .nr(16)
4732         .kr(1)
4733         .sr(1)
4734         .m(m)
4735         .n(16)
4736         .k(8)
4737         .iterations(1)
4738         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4739     }
4740   }
4741 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_eq_8_subtile_n)4742   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
4743     TEST_REQUIRES_ARM_NEON;
4744     for (uint32_t n = 1; n <= 16; n++) {
4745       GemmMicrokernelTester()
4746         .mr(1)
4747         .nr(16)
4748         .kr(1)
4749         .sr(1)
4750         .m(1)
4751         .n(n)
4752         .k(8)
4753         .iterations(1)
4754         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4755     }
4756   }
4757 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_lt_8)4758   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_lt_8) {
4759     TEST_REQUIRES_ARM_NEON;
4760     for (size_t k = 1; k < 8; k++) {
4761       GemmMicrokernelTester()
4762         .mr(1)
4763         .nr(16)
4764         .kr(1)
4765         .sr(1)
4766         .m(1)
4767         .n(16)
4768         .k(k)
4769         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4770     }
4771   }
4772 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_lt_8_subtile)4773   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_lt_8_subtile) {
4774     TEST_REQUIRES_ARM_NEON;
4775     for (size_t k = 1; k < 8; k++) {
4776       for (uint32_t n = 1; n <= 16; n++) {
4777         for (uint32_t m = 1; m <= 1; m++) {
4778           GemmMicrokernelTester()
4779             .mr(1)
4780             .nr(16)
4781             .kr(1)
4782             .sr(1)
4783             .m(m)
4784             .n(n)
4785             .k(k)
4786             .iterations(1)
4787             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4788         }
4789       }
4790     }
4791   }
4792 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_gt_8)4793   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_gt_8) {
4794     TEST_REQUIRES_ARM_NEON;
4795     for (size_t k = 9; k < 16; k++) {
4796       GemmMicrokernelTester()
4797         .mr(1)
4798         .nr(16)
4799         .kr(1)
4800         .sr(1)
4801         .m(1)
4802         .n(16)
4803         .k(k)
4804         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4805     }
4806   }
4807 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_gt_8_subtile)4808   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_gt_8_subtile) {
4809     TEST_REQUIRES_ARM_NEON;
4810     for (size_t k = 9; k < 16; k++) {
4811       for (uint32_t n = 1; n <= 16; n++) {
4812         for (uint32_t m = 1; m <= 1; m++) {
4813           GemmMicrokernelTester()
4814             .mr(1)
4815             .nr(16)
4816             .kr(1)
4817             .sr(1)
4818             .m(m)
4819             .n(n)
4820             .k(k)
4821             .iterations(1)
4822             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4823         }
4824       }
4825     }
4826   }
4827 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_div_8)4828   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_div_8) {
4829     TEST_REQUIRES_ARM_NEON;
4830     for (size_t k = 16; k <= 80; k += 8) {
4831       GemmMicrokernelTester()
4832         .mr(1)
4833         .nr(16)
4834         .kr(1)
4835         .sr(1)
4836         .m(1)
4837         .n(16)
4838         .k(k)
4839         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4840     }
4841   }
4842 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_div_8_subtile)4843   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_div_8_subtile) {
4844     TEST_REQUIRES_ARM_NEON;
4845     for (size_t k = 16; k <= 80; k += 8) {
4846       for (uint32_t n = 1; n <= 16; n++) {
4847         for (uint32_t m = 1; m <= 1; m++) {
4848           GemmMicrokernelTester()
4849             .mr(1)
4850             .nr(16)
4851             .kr(1)
4852             .sr(1)
4853             .m(m)
4854             .n(n)
4855             .k(k)
4856             .iterations(1)
4857             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4858         }
4859       }
4860     }
4861   }
4862 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,n_gt_16)4863   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_gt_16) {
4864     TEST_REQUIRES_ARM_NEON;
4865     for (uint32_t n = 17; n < 32; n++) {
4866       for (size_t k = 1; k <= 40; k += 9) {
4867         GemmMicrokernelTester()
4868           .mr(1)
4869           .nr(16)
4870           .kr(1)
4871           .sr(1)
4872           .m(1)
4873           .n(n)
4874           .k(k)
4875           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4876       }
4877     }
4878   }
4879 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,n_gt_16_strided_cn)4880   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
4881     TEST_REQUIRES_ARM_NEON;
4882     for (uint32_t n = 17; n < 32; n++) {
4883       for (size_t k = 1; k <= 40; k += 9) {
4884         GemmMicrokernelTester()
4885           .mr(1)
4886           .nr(16)
4887           .kr(1)
4888           .sr(1)
4889           .m(1)
4890           .n(n)
4891           .k(k)
4892           .cn_stride(19)
4893           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4894       }
4895     }
4896   }
4897 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,n_gt_16_subtile)4898   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_gt_16_subtile) {
4899     TEST_REQUIRES_ARM_NEON;
4900     for (uint32_t n = 17; n < 32; n++) {
4901       for (size_t k = 1; k <= 40; k += 9) {
4902         for (uint32_t m = 1; m <= 1; m++) {
4903           GemmMicrokernelTester()
4904             .mr(1)
4905             .nr(16)
4906             .kr(1)
4907             .sr(1)
4908             .m(m)
4909             .n(n)
4910             .k(k)
4911             .iterations(1)
4912             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4913         }
4914       }
4915     }
4916   }
4917 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,n_div_16)4918   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_div_16) {
4919     TEST_REQUIRES_ARM_NEON;
4920     for (uint32_t n = 32; n <= 48; n += 16) {
4921       for (size_t k = 1; k <= 40; k += 9) {
4922         GemmMicrokernelTester()
4923           .mr(1)
4924           .nr(16)
4925           .kr(1)
4926           .sr(1)
4927           .m(1)
4928           .n(n)
4929           .k(k)
4930           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4931       }
4932     }
4933   }
4934 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,n_div_16_strided_cn)4935   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
4936     TEST_REQUIRES_ARM_NEON;
4937     for (uint32_t n = 32; n <= 48; n += 16) {
4938       for (size_t k = 1; k <= 40; k += 9) {
4939         GemmMicrokernelTester()
4940           .mr(1)
4941           .nr(16)
4942           .kr(1)
4943           .sr(1)
4944           .m(1)
4945           .n(n)
4946           .k(k)
4947           .cn_stride(19)
4948           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4949       }
4950     }
4951   }
4952 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,n_div_16_subtile)4953   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_div_16_subtile) {
4954     TEST_REQUIRES_ARM_NEON;
4955     for (uint32_t n = 32; n <= 48; n += 16) {
4956       for (size_t k = 1; k <= 40; k += 9) {
4957         for (uint32_t m = 1; m <= 1; m++) {
4958           GemmMicrokernelTester()
4959             .mr(1)
4960             .nr(16)
4961             .kr(1)
4962             .sr(1)
4963             .m(m)
4964             .n(n)
4965             .k(k)
4966             .iterations(1)
4967             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4968         }
4969       }
4970     }
4971   }
4972 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,small_kernel)4973   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, small_kernel) {
4974     TEST_REQUIRES_ARM_NEON;
4975     for (size_t k = 1; k <= 40; k += 9) {
4976       GemmMicrokernelTester()
4977         .mr(1)
4978         .nr(16)
4979         .kr(1)
4980         .sr(1)
4981         .m(1)
4982         .n(16)
4983         .k(k)
4984         .ks(3)
4985         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4986     }
4987   }
4988 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,small_kernel_subtile)4989   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, small_kernel_subtile) {
4990     TEST_REQUIRES_ARM_NEON;
4991     for (size_t k = 1; k <= 40; k += 9) {
4992       for (uint32_t n = 1; n <= 16; n++) {
4993         for (uint32_t m = 1; m <= 1; m++) {
4994           GemmMicrokernelTester()
4995             .mr(1)
4996             .nr(16)
4997             .kr(1)
4998             .sr(1)
4999             .m(m)
5000             .n(n)
5001             .k(k)
5002             .ks(3)
5003             .iterations(1)
5004             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5005         }
5006       }
5007     }
5008   }
5009 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,n_gt_16_small_kernel)5010   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
5011     TEST_REQUIRES_ARM_NEON;
5012     for (uint32_t n = 17; n < 32; n++) {
5013       for (size_t k = 1; k <= 40; k += 9) {
5014         GemmMicrokernelTester()
5015           .mr(1)
5016           .nr(16)
5017           .kr(1)
5018           .sr(1)
5019           .m(1)
5020           .n(n)
5021           .k(k)
5022           .ks(3)
5023           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5024       }
5025     }
5026   }
5027 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,n_div_16_small_kernel)5028   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
5029     TEST_REQUIRES_ARM_NEON;
5030     for (uint32_t n = 32; n <= 48; n += 16) {
5031       for (size_t k = 1; k <= 40; k += 9) {
5032         GemmMicrokernelTester()
5033           .mr(1)
5034           .nr(16)
5035           .kr(1)
5036           .sr(1)
5037           .m(1)
5038           .n(n)
5039           .k(k)
5040           .ks(3)
5041           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5042       }
5043     }
5044   }
5045 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,strided_cm_subtile)5046   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, strided_cm_subtile) {
5047     TEST_REQUIRES_ARM_NEON;
5048     for (size_t k = 1; k <= 40; k += 9) {
5049       for (uint32_t n = 1; n <= 16; n++) {
5050         for (uint32_t m = 1; m <= 1; m++) {
5051           GemmMicrokernelTester()
5052             .mr(1)
5053             .nr(16)
5054             .kr(1)
5055             .sr(1)
5056             .m(m)
5057             .n(n)
5058             .k(k)
5059             .cm_stride(19)
5060             .iterations(1)
5061             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5062         }
5063       }
5064     }
5065   }
5066 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,a_offset)5067   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, a_offset) {
5068     TEST_REQUIRES_ARM_NEON;
5069     for (size_t k = 1; k <= 40; k += 9) {
5070       GemmMicrokernelTester()
5071         .mr(1)
5072         .nr(16)
5073         .kr(1)
5074         .sr(1)
5075         .m(1)
5076         .n(16)
5077         .k(k)
5078         .ks(3)
5079         .a_offset(43)
5080         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5081     }
5082   }
5083 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,zero)5084   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, zero) {
5085     TEST_REQUIRES_ARM_NEON;
5086     for (size_t k = 1; k <= 40; k += 9) {
5087       for (uint32_t mz = 0; mz < 1; mz++) {
5088         GemmMicrokernelTester()
5089           .mr(1)
5090           .nr(16)
5091           .kr(1)
5092           .sr(1)
5093           .m(1)
5094           .n(16)
5095           .k(k)
5096           .ks(3)
5097           .a_offset(43)
5098           .zero_index(mz)
5099           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5100       }
5101     }
5102   }
5103 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,qmin)5104   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, qmin) {
5105     TEST_REQUIRES_ARM_NEON;
5106     GemmMicrokernelTester()
5107       .mr(1)
5108       .nr(16)
5109       .kr(1)
5110       .sr(1)
5111       .m(1)
5112       .n(16)
5113       .k(8)
5114       .qmin(128)
5115       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5116   }
5117 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,qmax)5118   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, qmax) {
5119     TEST_REQUIRES_ARM_NEON;
5120     GemmMicrokernelTester()
5121       .mr(1)
5122       .nr(16)
5123       .kr(1)
5124       .sr(1)
5125       .m(1)
5126       .n(16)
5127       .k(8)
5128       .qmax(128)
5129       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5130   }
5131 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,strided_cm)5132   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, strided_cm) {
5133     TEST_REQUIRES_ARM_NEON;
5134     GemmMicrokernelTester()
5135       .mr(1)
5136       .nr(16)
5137       .kr(1)
5138       .sr(1)
5139       .m(1)
5140       .n(16)
5141       .k(8)
5142       .cm_stride(19)
5143       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5144   }
5145 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,no_a_zero_point)5146   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, no_a_zero_point) {
5147     TEST_REQUIRES_ARM_NEON;
5148     for (size_t k = 1; k <= 40; k += 9) {
5149       GemmMicrokernelTester()
5150         .mr(1)
5151         .nr(16)
5152         .kr(1)
5153         .sr(1)
5154         .m(1)
5155         .n(16)
5156         .k(k)
5157         .a_zero_point(0)
5158         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5159     }
5160   }
5161 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,no_b_zero_point)5162   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, no_b_zero_point) {
5163     TEST_REQUIRES_ARM_NEON;
5164     for (size_t k = 1; k <= 40; k += 9) {
5165       GemmMicrokernelTester()
5166         .mr(1)
5167         .nr(16)
5168         .kr(1)
5169         .sr(1)
5170         .m(1)
5171         .n(16)
5172         .k(k)
5173         .b_zero_point(0)
5174         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5175     }
5176   }
5177 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,no_zero_point)5178   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, no_zero_point) {
5179     TEST_REQUIRES_ARM_NEON;
5180     for (size_t k = 1; k <= 40; k += 9) {
5181       GemmMicrokernelTester()
5182         .mr(1)
5183         .nr(16)
5184         .kr(1)
5185         .sr(1)
5186         .m(1)
5187         .n(16)
5188         .k(k)
5189         .a_zero_point(0)
5190         .b_zero_point(0)
5191         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5192     }
5193   }
5194 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
5195 
5196 
5197 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_eq_8)5198   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8) {
5199     TEST_REQUIRES_ARM_NEON;
5200     GemmMicrokernelTester()
5201       .mr(2)
5202       .nr(16)
5203       .kr(1)
5204       .sr(1)
5205       .m(2)
5206       .n(16)
5207       .k(8)
5208       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5209   }
5210 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,strided_cn)5211   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, strided_cn) {
5212     TEST_REQUIRES_ARM_NEON;
5213     GemmMicrokernelTester()
5214       .mr(2)
5215       .nr(16)
5216       .kr(1)
5217       .sr(1)
5218       .m(2)
5219       .n(16)
5220       .k(8)
5221       .cn_stride(19)
5222       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5223   }
5224 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_eq_8_subtile)5225   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8_subtile) {
5226     TEST_REQUIRES_ARM_NEON;
5227     for (uint32_t n = 1; n <= 16; n++) {
5228       for (uint32_t m = 1; m <= 2; m++) {
5229         GemmMicrokernelTester()
5230           .mr(2)
5231           .nr(16)
5232           .kr(1)
5233           .sr(1)
5234           .m(m)
5235           .n(n)
5236           .k(8)
5237           .iterations(1)
5238           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5239       }
5240     }
5241   }
5242 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_eq_8_subtile_m)5243   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
5244     TEST_REQUIRES_ARM_NEON;
5245     for (uint32_t m = 1; m <= 2; m++) {
5246       GemmMicrokernelTester()
5247         .mr(2)
5248         .nr(16)
5249         .kr(1)
5250         .sr(1)
5251         .m(m)
5252         .n(16)
5253         .k(8)
5254         .iterations(1)
5255         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5256     }
5257   }
5258 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_eq_8_subtile_n)5259   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
5260     TEST_REQUIRES_ARM_NEON;
5261     for (uint32_t n = 1; n <= 16; n++) {
5262       GemmMicrokernelTester()
5263         .mr(2)
5264         .nr(16)
5265         .kr(1)
5266         .sr(1)
5267         .m(2)
5268         .n(n)
5269         .k(8)
5270         .iterations(1)
5271         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5272     }
5273   }
5274 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_lt_8)5275   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_lt_8) {
5276     TEST_REQUIRES_ARM_NEON;
5277     for (size_t k = 1; k < 8; k++) {
5278       GemmMicrokernelTester()
5279         .mr(2)
5280         .nr(16)
5281         .kr(1)
5282         .sr(1)
5283         .m(2)
5284         .n(16)
5285         .k(k)
5286         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5287     }
5288   }
5289 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_lt_8_subtile)5290   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_lt_8_subtile) {
5291     TEST_REQUIRES_ARM_NEON;
5292     for (size_t k = 1; k < 8; k++) {
5293       for (uint32_t n = 1; n <= 16; n++) {
5294         for (uint32_t m = 1; m <= 2; m++) {
5295           GemmMicrokernelTester()
5296             .mr(2)
5297             .nr(16)
5298             .kr(1)
5299             .sr(1)
5300             .m(m)
5301             .n(n)
5302             .k(k)
5303             .iterations(1)
5304             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5305         }
5306       }
5307     }
5308   }
5309 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_gt_8)5310   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_gt_8) {
5311     TEST_REQUIRES_ARM_NEON;
5312     for (size_t k = 9; k < 16; k++) {
5313       GemmMicrokernelTester()
5314         .mr(2)
5315         .nr(16)
5316         .kr(1)
5317         .sr(1)
5318         .m(2)
5319         .n(16)
5320         .k(k)
5321         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5322     }
5323   }
5324 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_gt_8_subtile)5325   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_gt_8_subtile) {
5326     TEST_REQUIRES_ARM_NEON;
5327     for (size_t k = 9; k < 16; k++) {
5328       for (uint32_t n = 1; n <= 16; n++) {
5329         for (uint32_t m = 1; m <= 2; m++) {
5330           GemmMicrokernelTester()
5331             .mr(2)
5332             .nr(16)
5333             .kr(1)
5334             .sr(1)
5335             .m(m)
5336             .n(n)
5337             .k(k)
5338             .iterations(1)
5339             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5340         }
5341       }
5342     }
5343   }
5344 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_div_8)5345   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_div_8) {
5346     TEST_REQUIRES_ARM_NEON;
5347     for (size_t k = 16; k <= 80; k += 8) {
5348       GemmMicrokernelTester()
5349         .mr(2)
5350         .nr(16)
5351         .kr(1)
5352         .sr(1)
5353         .m(2)
5354         .n(16)
5355         .k(k)
5356         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5357     }
5358   }
5359 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_div_8_subtile)5360   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_div_8_subtile) {
5361     TEST_REQUIRES_ARM_NEON;
5362     for (size_t k = 16; k <= 80; k += 8) {
5363       for (uint32_t n = 1; n <= 16; n++) {
5364         for (uint32_t m = 1; m <= 2; m++) {
5365           GemmMicrokernelTester()
5366             .mr(2)
5367             .nr(16)
5368             .kr(1)
5369             .sr(1)
5370             .m(m)
5371             .n(n)
5372             .k(k)
5373             .iterations(1)
5374             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5375         }
5376       }
5377     }
5378   }
5379 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_gt_16)5380   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_gt_16) {
5381     TEST_REQUIRES_ARM_NEON;
5382     for (uint32_t n = 17; n < 32; n++) {
5383       for (size_t k = 1; k <= 40; k += 9) {
5384         GemmMicrokernelTester()
5385           .mr(2)
5386           .nr(16)
5387           .kr(1)
5388           .sr(1)
5389           .m(2)
5390           .n(n)
5391           .k(k)
5392           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5393       }
5394     }
5395   }
5396 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_gt_16_strided_cn)5397   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
5398     TEST_REQUIRES_ARM_NEON;
5399     for (uint32_t n = 17; n < 32; n++) {
5400       for (size_t k = 1; k <= 40; k += 9) {
5401         GemmMicrokernelTester()
5402           .mr(2)
5403           .nr(16)
5404           .kr(1)
5405           .sr(1)
5406           .m(2)
5407           .n(n)
5408           .k(k)
5409           .cn_stride(19)
5410           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5411       }
5412     }
5413   }
5414 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_gt_16_subtile)5415   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_gt_16_subtile) {
5416     TEST_REQUIRES_ARM_NEON;
5417     for (uint32_t n = 17; n < 32; n++) {
5418       for (size_t k = 1; k <= 40; k += 9) {
5419         for (uint32_t m = 1; m <= 2; m++) {
5420           GemmMicrokernelTester()
5421             .mr(2)
5422             .nr(16)
5423             .kr(1)
5424             .sr(1)
5425             .m(m)
5426             .n(n)
5427             .k(k)
5428             .iterations(1)
5429             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5430         }
5431       }
5432     }
5433   }
5434 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_div_16)5435   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_div_16) {
5436     TEST_REQUIRES_ARM_NEON;
5437     for (uint32_t n = 32; n <= 48; n += 16) {
5438       for (size_t k = 1; k <= 40; k += 9) {
5439         GemmMicrokernelTester()
5440           .mr(2)
5441           .nr(16)
5442           .kr(1)
5443           .sr(1)
5444           .m(2)
5445           .n(n)
5446           .k(k)
5447           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5448       }
5449     }
5450   }
5451 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_div_16_strided_cn)5452   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
5453     TEST_REQUIRES_ARM_NEON;
5454     for (uint32_t n = 32; n <= 48; n += 16) {
5455       for (size_t k = 1; k <= 40; k += 9) {
5456         GemmMicrokernelTester()
5457           .mr(2)
5458           .nr(16)
5459           .kr(1)
5460           .sr(1)
5461           .m(2)
5462           .n(n)
5463           .k(k)
5464           .cn_stride(19)
5465           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5466       }
5467     }
5468   }
5469 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_div_16_subtile)5470   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_div_16_subtile) {
5471     TEST_REQUIRES_ARM_NEON;
5472     for (uint32_t n = 32; n <= 48; n += 16) {
5473       for (size_t k = 1; k <= 40; k += 9) {
5474         for (uint32_t m = 1; m <= 2; m++) {
5475           GemmMicrokernelTester()
5476             .mr(2)
5477             .nr(16)
5478             .kr(1)
5479             .sr(1)
5480             .m(m)
5481             .n(n)
5482             .k(k)
5483             .iterations(1)
5484             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5485         }
5486       }
5487     }
5488   }
5489 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,small_kernel)5490   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, small_kernel) {
5491     TEST_REQUIRES_ARM_NEON;
5492     for (size_t k = 1; k <= 40; k += 9) {
5493       GemmMicrokernelTester()
5494         .mr(2)
5495         .nr(16)
5496         .kr(1)
5497         .sr(1)
5498         .m(2)
5499         .n(16)
5500         .k(k)
5501         .ks(3)
5502         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5503     }
5504   }
5505 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,small_kernel_subtile)5506   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, small_kernel_subtile) {
5507     TEST_REQUIRES_ARM_NEON;
5508     for (size_t k = 1; k <= 40; k += 9) {
5509       for (uint32_t n = 1; n <= 16; n++) {
5510         for (uint32_t m = 1; m <= 2; m++) {
5511           GemmMicrokernelTester()
5512             .mr(2)
5513             .nr(16)
5514             .kr(1)
5515             .sr(1)
5516             .m(m)
5517             .n(n)
5518             .k(k)
5519             .ks(3)
5520             .iterations(1)
5521             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5522         }
5523       }
5524     }
5525   }
5526 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_gt_16_small_kernel)5527   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
5528     TEST_REQUIRES_ARM_NEON;
5529     for (uint32_t n = 17; n < 32; n++) {
5530       for (size_t k = 1; k <= 40; k += 9) {
5531         GemmMicrokernelTester()
5532           .mr(2)
5533           .nr(16)
5534           .kr(1)
5535           .sr(1)
5536           .m(2)
5537           .n(n)
5538           .k(k)
5539           .ks(3)
5540           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5541       }
5542     }
5543   }
5544 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_div_16_small_kernel)5545   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
5546     TEST_REQUIRES_ARM_NEON;
5547     for (uint32_t n = 32; n <= 48; n += 16) {
5548       for (size_t k = 1; k <= 40; k += 9) {
5549         GemmMicrokernelTester()
5550           .mr(2)
5551           .nr(16)
5552           .kr(1)
5553           .sr(1)
5554           .m(2)
5555           .n(n)
5556           .k(k)
5557           .ks(3)
5558           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5559       }
5560     }
5561   }
5562 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,strided_cm_subtile)5563   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, strided_cm_subtile) {
5564     TEST_REQUIRES_ARM_NEON;
5565     for (size_t k = 1; k <= 40; k += 9) {
5566       for (uint32_t n = 1; n <= 16; n++) {
5567         for (uint32_t m = 1; m <= 2; m++) {
5568           GemmMicrokernelTester()
5569             .mr(2)
5570             .nr(16)
5571             .kr(1)
5572             .sr(1)
5573             .m(m)
5574             .n(n)
5575             .k(k)
5576             .cm_stride(19)
5577             .iterations(1)
5578             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5579         }
5580       }
5581     }
5582   }
5583 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,a_offset)5584   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, a_offset) {
5585     TEST_REQUIRES_ARM_NEON;
5586     for (size_t k = 1; k <= 40; k += 9) {
5587       GemmMicrokernelTester()
5588         .mr(2)
5589         .nr(16)
5590         .kr(1)
5591         .sr(1)
5592         .m(2)
5593         .n(16)
5594         .k(k)
5595         .ks(3)
5596         .a_offset(83)
5597         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5598     }
5599   }
5600 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,zero)5601   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, zero) {
5602     TEST_REQUIRES_ARM_NEON;
5603     for (size_t k = 1; k <= 40; k += 9) {
5604       for (uint32_t mz = 0; mz < 2; mz++) {
5605         GemmMicrokernelTester()
5606           .mr(2)
5607           .nr(16)
5608           .kr(1)
5609           .sr(1)
5610           .m(2)
5611           .n(16)
5612           .k(k)
5613           .ks(3)
5614           .a_offset(83)
5615           .zero_index(mz)
5616           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5617       }
5618     }
5619   }
5620 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,qmin)5621   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, qmin) {
5622     TEST_REQUIRES_ARM_NEON;
5623     GemmMicrokernelTester()
5624       .mr(2)
5625       .nr(16)
5626       .kr(1)
5627       .sr(1)
5628       .m(2)
5629       .n(16)
5630       .k(8)
5631       .qmin(128)
5632       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5633   }
5634 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,qmax)5635   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, qmax) {
5636     TEST_REQUIRES_ARM_NEON;
5637     GemmMicrokernelTester()
5638       .mr(2)
5639       .nr(16)
5640       .kr(1)
5641       .sr(1)
5642       .m(2)
5643       .n(16)
5644       .k(8)
5645       .qmax(128)
5646       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5647   }
5648 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,strided_cm)5649   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, strided_cm) {
5650     TEST_REQUIRES_ARM_NEON;
5651     GemmMicrokernelTester()
5652       .mr(2)
5653       .nr(16)
5654       .kr(1)
5655       .sr(1)
5656       .m(2)
5657       .n(16)
5658       .k(8)
5659       .cm_stride(19)
5660       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5661   }
5662 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,no_a_zero_point)5663   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, no_a_zero_point) {
5664     TEST_REQUIRES_ARM_NEON;
5665     for (size_t k = 1; k <= 40; k += 9) {
5666       GemmMicrokernelTester()
5667         .mr(2)
5668         .nr(16)
5669         .kr(1)
5670         .sr(1)
5671         .m(2)
5672         .n(16)
5673         .k(k)
5674         .a_zero_point(0)
5675         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5676     }
5677   }
5678 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,no_b_zero_point)5679   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, no_b_zero_point) {
5680     TEST_REQUIRES_ARM_NEON;
5681     for (size_t k = 1; k <= 40; k += 9) {
5682       GemmMicrokernelTester()
5683         .mr(2)
5684         .nr(16)
5685         .kr(1)
5686         .sr(1)
5687         .m(2)
5688         .n(16)
5689         .k(k)
5690         .b_zero_point(0)
5691         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5692     }
5693   }
5694 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,no_zero_point)5695   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, no_zero_point) {
5696     TEST_REQUIRES_ARM_NEON;
5697     for (size_t k = 1; k <= 40; k += 9) {
5698       GemmMicrokernelTester()
5699         .mr(2)
5700         .nr(16)
5701         .kr(1)
5702         .sr(1)
5703         .m(2)
5704         .n(16)
5705         .k(k)
5706         .a_zero_point(0)
5707         .b_zero_point(0)
5708         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5709     }
5710   }
5711 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
5712 
5713 
5714 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,k_eq_8)5715   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_eq_8) {
5716     TEST_REQUIRES_ARM_NEON_DOT;
5717     GemmMicrokernelTester()
5718       .mr(2)
5719       .nr(16)
5720       .kr(4)
5721       .sr(1)
5722       .m(2)
5723       .n(16)
5724       .k(8)
5725       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5726   }
5727 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,strided_cn)5728   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, strided_cn) {
5729     TEST_REQUIRES_ARM_NEON_DOT;
5730     GemmMicrokernelTester()
5731       .mr(2)
5732       .nr(16)
5733       .kr(4)
5734       .sr(1)
5735       .m(2)
5736       .n(16)
5737       .k(8)
5738       .cn_stride(19)
5739       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5740   }
5741 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,k_eq_8_subtile)5742   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_eq_8_subtile) {
5743     TEST_REQUIRES_ARM_NEON_DOT;
5744     for (uint32_t n = 1; n <= 16; n++) {
5745       for (uint32_t m = 1; m <= 2; m++) {
5746         GemmMicrokernelTester()
5747           .mr(2)
5748           .nr(16)
5749           .kr(4)
5750           .sr(1)
5751           .m(m)
5752           .n(n)
5753           .k(8)
5754           .iterations(1)
5755           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5756       }
5757     }
5758   }
5759 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,k_eq_8_subtile_m)5760   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_eq_8_subtile_m) {
5761     TEST_REQUIRES_ARM_NEON_DOT;
5762     for (uint32_t m = 1; m <= 2; m++) {
5763       GemmMicrokernelTester()
5764         .mr(2)
5765         .nr(16)
5766         .kr(4)
5767         .sr(1)
5768         .m(m)
5769         .n(16)
5770         .k(8)
5771         .iterations(1)
5772         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5773     }
5774   }
5775 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,k_eq_8_subtile_n)5776   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_eq_8_subtile_n) {
5777     TEST_REQUIRES_ARM_NEON_DOT;
5778     for (uint32_t n = 1; n <= 16; n++) {
5779       GemmMicrokernelTester()
5780         .mr(2)
5781         .nr(16)
5782         .kr(4)
5783         .sr(1)
5784         .m(2)
5785         .n(n)
5786         .k(8)
5787         .iterations(1)
5788         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5789     }
5790   }
5791 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,k_lt_8)5792   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_lt_8) {
5793     TEST_REQUIRES_ARM_NEON_DOT;
5794     for (size_t k = 1; k < 8; k++) {
5795       GemmMicrokernelTester()
5796         .mr(2)
5797         .nr(16)
5798         .kr(4)
5799         .sr(1)
5800         .m(2)
5801         .n(16)
5802         .k(k)
5803         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5804     }
5805   }
5806 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,k_lt_8_subtile)5807   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_lt_8_subtile) {
5808     TEST_REQUIRES_ARM_NEON_DOT;
5809     for (size_t k = 1; k < 8; k++) {
5810       for (uint32_t n = 1; n <= 16; n++) {
5811         for (uint32_t m = 1; m <= 2; m++) {
5812           GemmMicrokernelTester()
5813             .mr(2)
5814             .nr(16)
5815             .kr(4)
5816             .sr(1)
5817             .m(m)
5818             .n(n)
5819             .k(k)
5820             .iterations(1)
5821             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5822         }
5823       }
5824     }
5825   }
5826 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,k_gt_8)5827   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_gt_8) {
5828     TEST_REQUIRES_ARM_NEON_DOT;
5829     for (size_t k = 9; k < 16; k++) {
5830       GemmMicrokernelTester()
5831         .mr(2)
5832         .nr(16)
5833         .kr(4)
5834         .sr(1)
5835         .m(2)
5836         .n(16)
5837         .k(k)
5838         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5839     }
5840   }
5841 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,k_gt_8_subtile)5842   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_gt_8_subtile) {
5843     TEST_REQUIRES_ARM_NEON_DOT;
5844     for (size_t k = 9; k < 16; k++) {
5845       for (uint32_t n = 1; n <= 16; n++) {
5846         for (uint32_t m = 1; m <= 2; m++) {
5847           GemmMicrokernelTester()
5848             .mr(2)
5849             .nr(16)
5850             .kr(4)
5851             .sr(1)
5852             .m(m)
5853             .n(n)
5854             .k(k)
5855             .iterations(1)
5856             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5857         }
5858       }
5859     }
5860   }
5861 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,k_div_8)5862   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_div_8) {
5863     TEST_REQUIRES_ARM_NEON_DOT;
5864     for (size_t k = 16; k <= 80; k += 8) {
5865       GemmMicrokernelTester()
5866         .mr(2)
5867         .nr(16)
5868         .kr(4)
5869         .sr(1)
5870         .m(2)
5871         .n(16)
5872         .k(k)
5873         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5874     }
5875   }
5876 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,k_div_8_subtile)5877   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_div_8_subtile) {
5878     TEST_REQUIRES_ARM_NEON_DOT;
5879     for (size_t k = 16; k <= 80; k += 8) {
5880       for (uint32_t n = 1; n <= 16; n++) {
5881         for (uint32_t m = 1; m <= 2; m++) {
5882           GemmMicrokernelTester()
5883             .mr(2)
5884             .nr(16)
5885             .kr(4)
5886             .sr(1)
5887             .m(m)
5888             .n(n)
5889             .k(k)
5890             .iterations(1)
5891             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5892         }
5893       }
5894     }
5895   }
5896 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,n_gt_16)5897   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, n_gt_16) {
5898     TEST_REQUIRES_ARM_NEON_DOT;
5899     for (uint32_t n = 17; n < 32; n++) {
5900       for (size_t k = 1; k <= 40; k += 9) {
5901         GemmMicrokernelTester()
5902           .mr(2)
5903           .nr(16)
5904           .kr(4)
5905           .sr(1)
5906           .m(2)
5907           .n(n)
5908           .k(k)
5909           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5910       }
5911     }
5912   }
5913 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,n_gt_16_strided_cn)5914   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, n_gt_16_strided_cn) {
5915     TEST_REQUIRES_ARM_NEON_DOT;
5916     for (uint32_t n = 17; n < 32; n++) {
5917       for (size_t k = 1; k <= 40; k += 9) {
5918         GemmMicrokernelTester()
5919           .mr(2)
5920           .nr(16)
5921           .kr(4)
5922           .sr(1)
5923           .m(2)
5924           .n(n)
5925           .k(k)
5926           .cn_stride(19)
5927           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5928       }
5929     }
5930   }
5931 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,n_gt_16_subtile)5932   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, n_gt_16_subtile) {
5933     TEST_REQUIRES_ARM_NEON_DOT;
5934     for (uint32_t n = 17; n < 32; n++) {
5935       for (size_t k = 1; k <= 40; k += 9) {
5936         for (uint32_t m = 1; m <= 2; m++) {
5937           GemmMicrokernelTester()
5938             .mr(2)
5939             .nr(16)
5940             .kr(4)
5941             .sr(1)
5942             .m(m)
5943             .n(n)
5944             .k(k)
5945             .iterations(1)
5946             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5947         }
5948       }
5949     }
5950   }
5951 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,n_div_16)5952   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, n_div_16) {
5953     TEST_REQUIRES_ARM_NEON_DOT;
5954     for (uint32_t n = 32; n <= 48; n += 16) {
5955       for (size_t k = 1; k <= 40; k += 9) {
5956         GemmMicrokernelTester()
5957           .mr(2)
5958           .nr(16)
5959           .kr(4)
5960           .sr(1)
5961           .m(2)
5962           .n(n)
5963           .k(k)
5964           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5965       }
5966     }
5967   }
5968 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,n_div_16_strided_cn)5969   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, n_div_16_strided_cn) {
5970     TEST_REQUIRES_ARM_NEON_DOT;
5971     for (uint32_t n = 32; n <= 48; n += 16) {
5972       for (size_t k = 1; k <= 40; k += 9) {
5973         GemmMicrokernelTester()
5974           .mr(2)
5975           .nr(16)
5976           .kr(4)
5977           .sr(1)
5978           .m(2)
5979           .n(n)
5980           .k(k)
5981           .cn_stride(19)
5982           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5983       }
5984     }
5985   }
5986 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,n_div_16_subtile)5987   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, n_div_16_subtile) {
5988     TEST_REQUIRES_ARM_NEON_DOT;
5989     for (uint32_t n = 32; n <= 48; n += 16) {
5990       for (size_t k = 1; k <= 40; k += 9) {
5991         for (uint32_t m = 1; m <= 2; m++) {
5992           GemmMicrokernelTester()
5993             .mr(2)
5994             .nr(16)
5995             .kr(4)
5996             .sr(1)
5997             .m(m)
5998             .n(n)
5999             .k(k)
6000             .iterations(1)
6001             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6002         }
6003       }
6004     }
6005   }
6006 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,small_kernel)6007   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, small_kernel) {
6008     TEST_REQUIRES_ARM_NEON_DOT;
6009     for (size_t k = 1; k <= 40; k += 9) {
6010       GemmMicrokernelTester()
6011         .mr(2)
6012         .nr(16)
6013         .kr(4)
6014         .sr(1)
6015         .m(2)
6016         .n(16)
6017         .k(k)
6018         .ks(3)
6019         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6020     }
6021   }
6022 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,small_kernel_subtile)6023   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, small_kernel_subtile) {
6024     TEST_REQUIRES_ARM_NEON_DOT;
6025     for (size_t k = 1; k <= 40; k += 9) {
6026       for (uint32_t n = 1; n <= 16; n++) {
6027         for (uint32_t m = 1; m <= 2; m++) {
6028           GemmMicrokernelTester()
6029             .mr(2)
6030             .nr(16)
6031             .kr(4)
6032             .sr(1)
6033             .m(m)
6034             .n(n)
6035             .k(k)
6036             .ks(3)
6037             .iterations(1)
6038             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6039         }
6040       }
6041     }
6042   }
6043 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,n_gt_16_small_kernel)6044   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, n_gt_16_small_kernel) {
6045     TEST_REQUIRES_ARM_NEON_DOT;
6046     for (uint32_t n = 17; n < 32; n++) {
6047       for (size_t k = 1; k <= 40; k += 9) {
6048         GemmMicrokernelTester()
6049           .mr(2)
6050           .nr(16)
6051           .kr(4)
6052           .sr(1)
6053           .m(2)
6054           .n(n)
6055           .k(k)
6056           .ks(3)
6057           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6058       }
6059     }
6060   }
6061 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,n_div_16_small_kernel)6062   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, n_div_16_small_kernel) {
6063     TEST_REQUIRES_ARM_NEON_DOT;
6064     for (uint32_t n = 32; n <= 48; n += 16) {
6065       for (size_t k = 1; k <= 40; k += 9) {
6066         GemmMicrokernelTester()
6067           .mr(2)
6068           .nr(16)
6069           .kr(4)
6070           .sr(1)
6071           .m(2)
6072           .n(n)
6073           .k(k)
6074           .ks(3)
6075           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6076       }
6077     }
6078   }
6079 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,strided_cm_subtile)6080   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, strided_cm_subtile) {
6081     TEST_REQUIRES_ARM_NEON_DOT;
6082     for (size_t k = 1; k <= 40; k += 9) {
6083       for (uint32_t n = 1; n <= 16; n++) {
6084         for (uint32_t m = 1; m <= 2; m++) {
6085           GemmMicrokernelTester()
6086             .mr(2)
6087             .nr(16)
6088             .kr(4)
6089             .sr(1)
6090             .m(m)
6091             .n(n)
6092             .k(k)
6093             .cm_stride(19)
6094             .iterations(1)
6095             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6096         }
6097       }
6098     }
6099   }
6100 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,a_offset)6101   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, a_offset) {
6102     TEST_REQUIRES_ARM_NEON_DOT;
6103     for (size_t k = 1; k <= 40; k += 9) {
6104       GemmMicrokernelTester()
6105         .mr(2)
6106         .nr(16)
6107         .kr(4)
6108         .sr(1)
6109         .m(2)
6110         .n(16)
6111         .k(k)
6112         .ks(3)
6113         .a_offset(83)
6114         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6115     }
6116   }
6117 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,zero)6118   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, zero) {
6119     TEST_REQUIRES_ARM_NEON_DOT;
6120     for (size_t k = 1; k <= 40; k += 9) {
6121       for (uint32_t mz = 0; mz < 2; mz++) {
6122         GemmMicrokernelTester()
6123           .mr(2)
6124           .nr(16)
6125           .kr(4)
6126           .sr(1)
6127           .m(2)
6128           .n(16)
6129           .k(k)
6130           .ks(3)
6131           .a_offset(83)
6132           .zero_index(mz)
6133           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6134       }
6135     }
6136   }
6137 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,qmin)6138   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, qmin) {
6139     TEST_REQUIRES_ARM_NEON_DOT;
6140     GemmMicrokernelTester()
6141       .mr(2)
6142       .nr(16)
6143       .kr(4)
6144       .sr(1)
6145       .m(2)
6146       .n(16)
6147       .k(8)
6148       .qmin(128)
6149       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6150   }
6151 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,qmax)6152   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, qmax) {
6153     TEST_REQUIRES_ARM_NEON_DOT;
6154     GemmMicrokernelTester()
6155       .mr(2)
6156       .nr(16)
6157       .kr(4)
6158       .sr(1)
6159       .m(2)
6160       .n(16)
6161       .k(8)
6162       .qmax(128)
6163       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6164   }
6165 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,strided_cm)6166   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, strided_cm) {
6167     TEST_REQUIRES_ARM_NEON_DOT;
6168     GemmMicrokernelTester()
6169       .mr(2)
6170       .nr(16)
6171       .kr(4)
6172       .sr(1)
6173       .m(2)
6174       .n(16)
6175       .k(8)
6176       .cm_stride(19)
6177       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6178   }
6179 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,no_a_zero_point)6180   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, no_a_zero_point) {
6181     TEST_REQUIRES_ARM_NEON_DOT;
6182     for (size_t k = 1; k <= 40; k += 9) {
6183       GemmMicrokernelTester()
6184         .mr(2)
6185         .nr(16)
6186         .kr(4)
6187         .sr(1)
6188         .m(2)
6189         .n(16)
6190         .k(k)
6191         .a_zero_point(0)
6192         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6193     }
6194   }
6195 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,no_b_zero_point)6196   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, no_b_zero_point) {
6197     TEST_REQUIRES_ARM_NEON_DOT;
6198     for (size_t k = 1; k <= 40; k += 9) {
6199       GemmMicrokernelTester()
6200         .mr(2)
6201         .nr(16)
6202         .kr(4)
6203         .sr(1)
6204         .m(2)
6205         .n(16)
6206         .k(k)
6207         .b_zero_point(0)
6208         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6209     }
6210   }
6211 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,no_zero_point)6212   TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, no_zero_point) {
6213     TEST_REQUIRES_ARM_NEON_DOT;
6214     for (size_t k = 1; k <= 40; k += 9) {
6215       GemmMicrokernelTester()
6216         .mr(2)
6217         .nr(16)
6218         .kr(4)
6219         .sr(1)
6220         .m(2)
6221         .n(16)
6222         .k(k)
6223         .a_zero_point(0)
6224         .b_zero_point(0)
6225         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6226     }
6227   }
6228 #endif  // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
6229 
6230 
6231 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,k_eq_8)6232   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_eq_8) {
6233     TEST_REQUIRES_ARM_NEON_DOT;
6234     GemmMicrokernelTester()
6235       .mr(2)
6236       .nr(32)
6237       .kr(4)
6238       .sr(1)
6239       .m(2)
6240       .n(32)
6241       .k(8)
6242       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6243   }
6244 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,strided_cn)6245   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, strided_cn) {
6246     TEST_REQUIRES_ARM_NEON_DOT;
6247     GemmMicrokernelTester()
6248       .mr(2)
6249       .nr(32)
6250       .kr(4)
6251       .sr(1)
6252       .m(2)
6253       .n(32)
6254       .k(8)
6255       .cn_stride(37)
6256       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6257   }
6258 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,k_eq_8_subtile)6259   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_eq_8_subtile) {
6260     TEST_REQUIRES_ARM_NEON_DOT;
6261     for (uint32_t n = 1; n <= 32; n++) {
6262       for (uint32_t m = 1; m <= 2; m++) {
6263         GemmMicrokernelTester()
6264           .mr(2)
6265           .nr(32)
6266           .kr(4)
6267           .sr(1)
6268           .m(m)
6269           .n(n)
6270           .k(8)
6271           .iterations(1)
6272           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6273       }
6274     }
6275   }
6276 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,k_eq_8_subtile_m)6277   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_eq_8_subtile_m) {
6278     TEST_REQUIRES_ARM_NEON_DOT;
6279     for (uint32_t m = 1; m <= 2; m++) {
6280       GemmMicrokernelTester()
6281         .mr(2)
6282         .nr(32)
6283         .kr(4)
6284         .sr(1)
6285         .m(m)
6286         .n(32)
6287         .k(8)
6288         .iterations(1)
6289         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6290     }
6291   }
6292 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,k_eq_8_subtile_n)6293   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_eq_8_subtile_n) {
6294     TEST_REQUIRES_ARM_NEON_DOT;
6295     for (uint32_t n = 1; n <= 32; n++) {
6296       GemmMicrokernelTester()
6297         .mr(2)
6298         .nr(32)
6299         .kr(4)
6300         .sr(1)
6301         .m(2)
6302         .n(n)
6303         .k(8)
6304         .iterations(1)
6305         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6306     }
6307   }
6308 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,k_lt_8)6309   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_lt_8) {
6310     TEST_REQUIRES_ARM_NEON_DOT;
6311     for (size_t k = 1; k < 8; k++) {
6312       GemmMicrokernelTester()
6313         .mr(2)
6314         .nr(32)
6315         .kr(4)
6316         .sr(1)
6317         .m(2)
6318         .n(32)
6319         .k(k)
6320         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6321     }
6322   }
6323 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,k_lt_8_subtile)6324   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_lt_8_subtile) {
6325     TEST_REQUIRES_ARM_NEON_DOT;
6326     for (size_t k = 1; k < 8; k++) {
6327       for (uint32_t n = 1; n <= 32; n++) {
6328         for (uint32_t m = 1; m <= 2; m++) {
6329           GemmMicrokernelTester()
6330             .mr(2)
6331             .nr(32)
6332             .kr(4)
6333             .sr(1)
6334             .m(m)
6335             .n(n)
6336             .k(k)
6337             .iterations(1)
6338             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6339         }
6340       }
6341     }
6342   }
6343 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,k_gt_8)6344   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_gt_8) {
6345     TEST_REQUIRES_ARM_NEON_DOT;
6346     for (size_t k = 9; k < 16; k++) {
6347       GemmMicrokernelTester()
6348         .mr(2)
6349         .nr(32)
6350         .kr(4)
6351         .sr(1)
6352         .m(2)
6353         .n(32)
6354         .k(k)
6355         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6356     }
6357   }
6358 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,k_gt_8_subtile)6359   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_gt_8_subtile) {
6360     TEST_REQUIRES_ARM_NEON_DOT;
6361     for (size_t k = 9; k < 16; k++) {
6362       for (uint32_t n = 1; n <= 32; n++) {
6363         for (uint32_t m = 1; m <= 2; m++) {
6364           GemmMicrokernelTester()
6365             .mr(2)
6366             .nr(32)
6367             .kr(4)
6368             .sr(1)
6369             .m(m)
6370             .n(n)
6371             .k(k)
6372             .iterations(1)
6373             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6374         }
6375       }
6376     }
6377   }
6378 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,k_div_8)6379   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_div_8) {
6380     TEST_REQUIRES_ARM_NEON_DOT;
6381     for (size_t k = 16; k <= 80; k += 8) {
6382       GemmMicrokernelTester()
6383         .mr(2)
6384         .nr(32)
6385         .kr(4)
6386         .sr(1)
6387         .m(2)
6388         .n(32)
6389         .k(k)
6390         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6391     }
6392   }
6393 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,k_div_8_subtile)6394   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_div_8_subtile) {
6395     TEST_REQUIRES_ARM_NEON_DOT;
6396     for (size_t k = 16; k <= 80; k += 8) {
6397       for (uint32_t n = 1; n <= 32; n++) {
6398         for (uint32_t m = 1; m <= 2; m++) {
6399           GemmMicrokernelTester()
6400             .mr(2)
6401             .nr(32)
6402             .kr(4)
6403             .sr(1)
6404             .m(m)
6405             .n(n)
6406             .k(k)
6407             .iterations(1)
6408             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6409         }
6410       }
6411     }
6412   }
6413 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,n_gt_32)6414   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, n_gt_32) {
6415     TEST_REQUIRES_ARM_NEON_DOT;
6416     for (uint32_t n = 33; n < 64; n++) {
6417       for (size_t k = 1; k <= 40; k += 9) {
6418         GemmMicrokernelTester()
6419           .mr(2)
6420           .nr(32)
6421           .kr(4)
6422           .sr(1)
6423           .m(2)
6424           .n(n)
6425           .k(k)
6426           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6427       }
6428     }
6429   }
6430 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,n_gt_32_strided_cn)6431   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, n_gt_32_strided_cn) {
6432     TEST_REQUIRES_ARM_NEON_DOT;
6433     for (uint32_t n = 33; n < 64; n++) {
6434       for (size_t k = 1; k <= 40; k += 9) {
6435         GemmMicrokernelTester()
6436           .mr(2)
6437           .nr(32)
6438           .kr(4)
6439           .sr(1)
6440           .m(2)
6441           .n(n)
6442           .k(k)
6443           .cn_stride(37)
6444           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6445       }
6446     }
6447   }
6448 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,n_gt_32_subtile)6449   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, n_gt_32_subtile) {
6450     TEST_REQUIRES_ARM_NEON_DOT;
6451     for (uint32_t n = 33; n < 64; n++) {
6452       for (size_t k = 1; k <= 40; k += 9) {
6453         for (uint32_t m = 1; m <= 2; m++) {
6454           GemmMicrokernelTester()
6455             .mr(2)
6456             .nr(32)
6457             .kr(4)
6458             .sr(1)
6459             .m(m)
6460             .n(n)
6461             .k(k)
6462             .iterations(1)
6463             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6464         }
6465       }
6466     }
6467   }
6468 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,n_div_32)6469   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, n_div_32) {
6470     TEST_REQUIRES_ARM_NEON_DOT;
6471     for (uint32_t n = 64; n <= 96; n += 32) {
6472       for (size_t k = 1; k <= 40; k += 9) {
6473         GemmMicrokernelTester()
6474           .mr(2)
6475           .nr(32)
6476           .kr(4)
6477           .sr(1)
6478           .m(2)
6479           .n(n)
6480           .k(k)
6481           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6482       }
6483     }
6484   }
6485 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,n_div_32_strided_cn)6486   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, n_div_32_strided_cn) {
6487     TEST_REQUIRES_ARM_NEON_DOT;
6488     for (uint32_t n = 64; n <= 96; n += 32) {
6489       for (size_t k = 1; k <= 40; k += 9) {
6490         GemmMicrokernelTester()
6491           .mr(2)
6492           .nr(32)
6493           .kr(4)
6494           .sr(1)
6495           .m(2)
6496           .n(n)
6497           .k(k)
6498           .cn_stride(37)
6499           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6500       }
6501     }
6502   }
6503 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,n_div_32_subtile)6504   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, n_div_32_subtile) {
6505     TEST_REQUIRES_ARM_NEON_DOT;
6506     for (uint32_t n = 64; n <= 96; n += 32) {
6507       for (size_t k = 1; k <= 40; k += 9) {
6508         for (uint32_t m = 1; m <= 2; m++) {
6509           GemmMicrokernelTester()
6510             .mr(2)
6511             .nr(32)
6512             .kr(4)
6513             .sr(1)
6514             .m(m)
6515             .n(n)
6516             .k(k)
6517             .iterations(1)
6518             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6519         }
6520       }
6521     }
6522   }
6523 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,small_kernel)6524   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, small_kernel) {
6525     TEST_REQUIRES_ARM_NEON_DOT;
6526     for (size_t k = 1; k <= 40; k += 9) {
6527       GemmMicrokernelTester()
6528         .mr(2)
6529         .nr(32)
6530         .kr(4)
6531         .sr(1)
6532         .m(2)
6533         .n(32)
6534         .k(k)
6535         .ks(3)
6536         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6537     }
6538   }
6539 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,small_kernel_subtile)6540   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, small_kernel_subtile) {
6541     TEST_REQUIRES_ARM_NEON_DOT;
6542     for (size_t k = 1; k <= 40; k += 9) {
6543       for (uint32_t n = 1; n <= 32; n++) {
6544         for (uint32_t m = 1; m <= 2; m++) {
6545           GemmMicrokernelTester()
6546             .mr(2)
6547             .nr(32)
6548             .kr(4)
6549             .sr(1)
6550             .m(m)
6551             .n(n)
6552             .k(k)
6553             .ks(3)
6554             .iterations(1)
6555             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6556         }
6557       }
6558     }
6559   }
6560 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,n_gt_32_small_kernel)6561   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, n_gt_32_small_kernel) {
6562     TEST_REQUIRES_ARM_NEON_DOT;
6563     for (uint32_t n = 33; n < 64; n++) {
6564       for (size_t k = 1; k <= 40; k += 9) {
6565         GemmMicrokernelTester()
6566           .mr(2)
6567           .nr(32)
6568           .kr(4)
6569           .sr(1)
6570           .m(2)
6571           .n(n)
6572           .k(k)
6573           .ks(3)
6574           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6575       }
6576     }
6577   }
6578 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,n_div_32_small_kernel)6579   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, n_div_32_small_kernel) {
6580     TEST_REQUIRES_ARM_NEON_DOT;
6581     for (uint32_t n = 64; n <= 96; n += 32) {
6582       for (size_t k = 1; k <= 40; k += 9) {
6583         GemmMicrokernelTester()
6584           .mr(2)
6585           .nr(32)
6586           .kr(4)
6587           .sr(1)
6588           .m(2)
6589           .n(n)
6590           .k(k)
6591           .ks(3)
6592           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6593       }
6594     }
6595   }
6596 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,strided_cm_subtile)6597   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, strided_cm_subtile) {
6598     TEST_REQUIRES_ARM_NEON_DOT;
6599     for (size_t k = 1; k <= 40; k += 9) {
6600       for (uint32_t n = 1; n <= 32; n++) {
6601         for (uint32_t m = 1; m <= 2; m++) {
6602           GemmMicrokernelTester()
6603             .mr(2)
6604             .nr(32)
6605             .kr(4)
6606             .sr(1)
6607             .m(m)
6608             .n(n)
6609             .k(k)
6610             .cm_stride(37)
6611             .iterations(1)
6612             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6613         }
6614       }
6615     }
6616   }
6617 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,a_offset)6618   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, a_offset) {
6619     TEST_REQUIRES_ARM_NEON_DOT;
6620     for (size_t k = 1; k <= 40; k += 9) {
6621       GemmMicrokernelTester()
6622         .mr(2)
6623         .nr(32)
6624         .kr(4)
6625         .sr(1)
6626         .m(2)
6627         .n(32)
6628         .k(k)
6629         .ks(3)
6630         .a_offset(83)
6631         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6632     }
6633   }
6634 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,zero)6635   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, zero) {
6636     TEST_REQUIRES_ARM_NEON_DOT;
6637     for (size_t k = 1; k <= 40; k += 9) {
6638       for (uint32_t mz = 0; mz < 2; mz++) {
6639         GemmMicrokernelTester()
6640           .mr(2)
6641           .nr(32)
6642           .kr(4)
6643           .sr(1)
6644           .m(2)
6645           .n(32)
6646           .k(k)
6647           .ks(3)
6648           .a_offset(83)
6649           .zero_index(mz)
6650           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6651       }
6652     }
6653   }
6654 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,qmin)6655   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, qmin) {
6656     TEST_REQUIRES_ARM_NEON_DOT;
6657     GemmMicrokernelTester()
6658       .mr(2)
6659       .nr(32)
6660       .kr(4)
6661       .sr(1)
6662       .m(2)
6663       .n(32)
6664       .k(8)
6665       .qmin(128)
6666       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6667   }
6668 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,qmax)6669   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, qmax) {
6670     TEST_REQUIRES_ARM_NEON_DOT;
6671     GemmMicrokernelTester()
6672       .mr(2)
6673       .nr(32)
6674       .kr(4)
6675       .sr(1)
6676       .m(2)
6677       .n(32)
6678       .k(8)
6679       .qmax(128)
6680       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6681   }
6682 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,strided_cm)6683   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, strided_cm) {
6684     TEST_REQUIRES_ARM_NEON_DOT;
6685     GemmMicrokernelTester()
6686       .mr(2)
6687       .nr(32)
6688       .kr(4)
6689       .sr(1)
6690       .m(2)
6691       .n(32)
6692       .k(8)
6693       .cm_stride(37)
6694       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6695   }
6696 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,no_a_zero_point)6697   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, no_a_zero_point) {
6698     TEST_REQUIRES_ARM_NEON_DOT;
6699     for (size_t k = 1; k <= 40; k += 9) {
6700       GemmMicrokernelTester()
6701         .mr(2)
6702         .nr(32)
6703         .kr(4)
6704         .sr(1)
6705         .m(2)
6706         .n(32)
6707         .k(k)
6708         .a_zero_point(0)
6709         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6710     }
6711   }
6712 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,no_b_zero_point)6713   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, no_b_zero_point) {
6714     TEST_REQUIRES_ARM_NEON_DOT;
6715     for (size_t k = 1; k <= 40; k += 9) {
6716       GemmMicrokernelTester()
6717         .mr(2)
6718         .nr(32)
6719         .kr(4)
6720         .sr(1)
6721         .m(2)
6722         .n(32)
6723         .k(k)
6724         .b_zero_point(0)
6725         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6726     }
6727   }
6728 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,no_zero_point)6729   TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, no_zero_point) {
6730     TEST_REQUIRES_ARM_NEON_DOT;
6731     for (size_t k = 1; k <= 40; k += 9) {
6732       GemmMicrokernelTester()
6733         .mr(2)
6734         .nr(32)
6735         .kr(4)
6736         .sr(1)
6737         .m(2)
6738         .n(32)
6739         .k(k)
6740         .a_zero_point(0)
6741         .b_zero_point(0)
6742         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6743     }
6744   }
6745 #endif  // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
6746 
6747 
6748 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_eq_8)6749   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_eq_8) {
6750     TEST_REQUIRES_ARM_NEON;
6751     GemmMicrokernelTester()
6752       .mr(3)
6753       .nr(16)
6754       .kr(1)
6755       .sr(1)
6756       .m(3)
6757       .n(16)
6758       .k(8)
6759       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6760   }
6761 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,strided_cn)6762   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, strided_cn) {
6763     TEST_REQUIRES_ARM_NEON;
6764     GemmMicrokernelTester()
6765       .mr(3)
6766       .nr(16)
6767       .kr(1)
6768       .sr(1)
6769       .m(3)
6770       .n(16)
6771       .k(8)
6772       .cn_stride(19)
6773       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6774   }
6775 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_eq_8_subtile)6776   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_eq_8_subtile) {
6777     TEST_REQUIRES_ARM_NEON;
6778     for (uint32_t n = 1; n <= 16; n++) {
6779       for (uint32_t m = 1; m <= 3; m++) {
6780         GemmMicrokernelTester()
6781           .mr(3)
6782           .nr(16)
6783           .kr(1)
6784           .sr(1)
6785           .m(m)
6786           .n(n)
6787           .k(8)
6788           .iterations(1)
6789           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6790       }
6791     }
6792   }
6793 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_eq_8_subtile_m)6794   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
6795     TEST_REQUIRES_ARM_NEON;
6796     for (uint32_t m = 1; m <= 3; m++) {
6797       GemmMicrokernelTester()
6798         .mr(3)
6799         .nr(16)
6800         .kr(1)
6801         .sr(1)
6802         .m(m)
6803         .n(16)
6804         .k(8)
6805         .iterations(1)
6806         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6807     }
6808   }
6809 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_eq_8_subtile_n)6810   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
6811     TEST_REQUIRES_ARM_NEON;
6812     for (uint32_t n = 1; n <= 16; n++) {
6813       GemmMicrokernelTester()
6814         .mr(3)
6815         .nr(16)
6816         .kr(1)
6817         .sr(1)
6818         .m(3)
6819         .n(n)
6820         .k(8)
6821         .iterations(1)
6822         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6823     }
6824   }
6825 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_lt_8)6826   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_lt_8) {
6827     TEST_REQUIRES_ARM_NEON;
6828     for (size_t k = 1; k < 8; k++) {
6829       GemmMicrokernelTester()
6830         .mr(3)
6831         .nr(16)
6832         .kr(1)
6833         .sr(1)
6834         .m(3)
6835         .n(16)
6836         .k(k)
6837         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6838     }
6839   }
6840 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_lt_8_subtile)6841   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_lt_8_subtile) {
6842     TEST_REQUIRES_ARM_NEON;
6843     for (size_t k = 1; k < 8; k++) {
6844       for (uint32_t n = 1; n <= 16; n++) {
6845         for (uint32_t m = 1; m <= 3; m++) {
6846           GemmMicrokernelTester()
6847             .mr(3)
6848             .nr(16)
6849             .kr(1)
6850             .sr(1)
6851             .m(m)
6852             .n(n)
6853             .k(k)
6854             .iterations(1)
6855             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6856         }
6857       }
6858     }
6859   }
6860 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_gt_8)6861   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_gt_8) {
6862     TEST_REQUIRES_ARM_NEON;
6863     for (size_t k = 9; k < 16; k++) {
6864       GemmMicrokernelTester()
6865         .mr(3)
6866         .nr(16)
6867         .kr(1)
6868         .sr(1)
6869         .m(3)
6870         .n(16)
6871         .k(k)
6872         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6873     }
6874   }
6875 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_gt_8_subtile)6876   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_gt_8_subtile) {
6877     TEST_REQUIRES_ARM_NEON;
6878     for (size_t k = 9; k < 16; k++) {
6879       for (uint32_t n = 1; n <= 16; n++) {
6880         for (uint32_t m = 1; m <= 3; m++) {
6881           GemmMicrokernelTester()
6882             .mr(3)
6883             .nr(16)
6884             .kr(1)
6885             .sr(1)
6886             .m(m)
6887             .n(n)
6888             .k(k)
6889             .iterations(1)
6890             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6891         }
6892       }
6893     }
6894   }
6895 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_div_8)6896   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_div_8) {
6897     TEST_REQUIRES_ARM_NEON;
6898     for (size_t k = 16; k <= 80; k += 8) {
6899       GemmMicrokernelTester()
6900         .mr(3)
6901         .nr(16)
6902         .kr(1)
6903         .sr(1)
6904         .m(3)
6905         .n(16)
6906         .k(k)
6907         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6908     }
6909   }
6910 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_div_8_subtile)6911   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_div_8_subtile) {
6912     TEST_REQUIRES_ARM_NEON;
6913     for (size_t k = 16; k <= 80; k += 8) {
6914       for (uint32_t n = 1; n <= 16; n++) {
6915         for (uint32_t m = 1; m <= 3; m++) {
6916           GemmMicrokernelTester()
6917             .mr(3)
6918             .nr(16)
6919             .kr(1)
6920             .sr(1)
6921             .m(m)
6922             .n(n)
6923             .k(k)
6924             .iterations(1)
6925             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6926         }
6927       }
6928     }
6929   }
6930 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,n_gt_16)6931   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_gt_16) {
6932     TEST_REQUIRES_ARM_NEON;
6933     for (uint32_t n = 17; n < 32; n++) {
6934       for (size_t k = 1; k <= 40; k += 9) {
6935         GemmMicrokernelTester()
6936           .mr(3)
6937           .nr(16)
6938           .kr(1)
6939           .sr(1)
6940           .m(3)
6941           .n(n)
6942           .k(k)
6943           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6944       }
6945     }
6946   }
6947 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,n_gt_16_strided_cn)6948   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
6949     TEST_REQUIRES_ARM_NEON;
6950     for (uint32_t n = 17; n < 32; n++) {
6951       for (size_t k = 1; k <= 40; k += 9) {
6952         GemmMicrokernelTester()
6953           .mr(3)
6954           .nr(16)
6955           .kr(1)
6956           .sr(1)
6957           .m(3)
6958           .n(n)
6959           .k(k)
6960           .cn_stride(19)
6961           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6962       }
6963     }
6964   }
6965 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,n_gt_16_subtile)6966   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_gt_16_subtile) {
6967     TEST_REQUIRES_ARM_NEON;
6968     for (uint32_t n = 17; n < 32; n++) {
6969       for (size_t k = 1; k <= 40; k += 9) {
6970         for (uint32_t m = 1; m <= 3; m++) {
6971           GemmMicrokernelTester()
6972             .mr(3)
6973             .nr(16)
6974             .kr(1)
6975             .sr(1)
6976             .m(m)
6977             .n(n)
6978             .k(k)
6979             .iterations(1)
6980             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6981         }
6982       }
6983     }
6984   }
6985 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,n_div_16)6986   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_div_16) {
6987     TEST_REQUIRES_ARM_NEON;
6988     for (uint32_t n = 32; n <= 48; n += 16) {
6989       for (size_t k = 1; k <= 40; k += 9) {
6990         GemmMicrokernelTester()
6991           .mr(3)
6992           .nr(16)
6993           .kr(1)
6994           .sr(1)
6995           .m(3)
6996           .n(n)
6997           .k(k)
6998           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6999       }
7000     }
7001   }
7002 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,n_div_16_strided_cn)7003   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
7004     TEST_REQUIRES_ARM_NEON;
7005     for (uint32_t n = 32; n <= 48; n += 16) {
7006       for (size_t k = 1; k <= 40; k += 9) {
7007         GemmMicrokernelTester()
7008           .mr(3)
7009           .nr(16)
7010           .kr(1)
7011           .sr(1)
7012           .m(3)
7013           .n(n)
7014           .k(k)
7015           .cn_stride(19)
7016           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7017       }
7018     }
7019   }
7020 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,n_div_16_subtile)7021   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_div_16_subtile) {
7022     TEST_REQUIRES_ARM_NEON;
7023     for (uint32_t n = 32; n <= 48; n += 16) {
7024       for (size_t k = 1; k <= 40; k += 9) {
7025         for (uint32_t m = 1; m <= 3; m++) {
7026           GemmMicrokernelTester()
7027             .mr(3)
7028             .nr(16)
7029             .kr(1)
7030             .sr(1)
7031             .m(m)
7032             .n(n)
7033             .k(k)
7034             .iterations(1)
7035             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7036         }
7037       }
7038     }
7039   }
7040 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,small_kernel)7041   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, small_kernel) {
7042     TEST_REQUIRES_ARM_NEON;
7043     for (size_t k = 1; k <= 40; k += 9) {
7044       GemmMicrokernelTester()
7045         .mr(3)
7046         .nr(16)
7047         .kr(1)
7048         .sr(1)
7049         .m(3)
7050         .n(16)
7051         .k(k)
7052         .ks(3)
7053         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7054     }
7055   }
7056 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,small_kernel_subtile)7057   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, small_kernel_subtile) {
7058     TEST_REQUIRES_ARM_NEON;
7059     for (size_t k = 1; k <= 40; k += 9) {
7060       for (uint32_t n = 1; n <= 16; n++) {
7061         for (uint32_t m = 1; m <= 3; m++) {
7062           GemmMicrokernelTester()
7063             .mr(3)
7064             .nr(16)
7065             .kr(1)
7066             .sr(1)
7067             .m(m)
7068             .n(n)
7069             .k(k)
7070             .ks(3)
7071             .iterations(1)
7072             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7073         }
7074       }
7075     }
7076   }
7077 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,n_gt_16_small_kernel)7078   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
7079     TEST_REQUIRES_ARM_NEON;
7080     for (uint32_t n = 17; n < 32; n++) {
7081       for (size_t k = 1; k <= 40; k += 9) {
7082         GemmMicrokernelTester()
7083           .mr(3)
7084           .nr(16)
7085           .kr(1)
7086           .sr(1)
7087           .m(3)
7088           .n(n)
7089           .k(k)
7090           .ks(3)
7091           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7092       }
7093     }
7094   }
7095 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,n_div_16_small_kernel)7096   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
7097     TEST_REQUIRES_ARM_NEON;
7098     for (uint32_t n = 32; n <= 48; n += 16) {
7099       for (size_t k = 1; k <= 40; k += 9) {
7100         GemmMicrokernelTester()
7101           .mr(3)
7102           .nr(16)
7103           .kr(1)
7104           .sr(1)
7105           .m(3)
7106           .n(n)
7107           .k(k)
7108           .ks(3)
7109           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7110       }
7111     }
7112   }
7113 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,strided_cm_subtile)7114   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, strided_cm_subtile) {
7115     TEST_REQUIRES_ARM_NEON;
7116     for (size_t k = 1; k <= 40; k += 9) {
7117       for (uint32_t n = 1; n <= 16; n++) {
7118         for (uint32_t m = 1; m <= 3; m++) {
7119           GemmMicrokernelTester()
7120             .mr(3)
7121             .nr(16)
7122             .kr(1)
7123             .sr(1)
7124             .m(m)
7125             .n(n)
7126             .k(k)
7127             .cm_stride(19)
7128             .iterations(1)
7129             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7130         }
7131       }
7132     }
7133   }
7134 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,a_offset)7135   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, a_offset) {
7136     TEST_REQUIRES_ARM_NEON;
7137     for (size_t k = 1; k <= 40; k += 9) {
7138       GemmMicrokernelTester()
7139         .mr(3)
7140         .nr(16)
7141         .kr(1)
7142         .sr(1)
7143         .m(3)
7144         .n(16)
7145         .k(k)
7146         .ks(3)
7147         .a_offset(127)
7148         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7149     }
7150   }
7151 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,zero)7152   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, zero) {
7153     TEST_REQUIRES_ARM_NEON;
7154     for (size_t k = 1; k <= 40; k += 9) {
7155       for (uint32_t mz = 0; mz < 3; mz++) {
7156         GemmMicrokernelTester()
7157           .mr(3)
7158           .nr(16)
7159           .kr(1)
7160           .sr(1)
7161           .m(3)
7162           .n(16)
7163           .k(k)
7164           .ks(3)
7165           .a_offset(127)
7166           .zero_index(mz)
7167           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7168       }
7169     }
7170   }
7171 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,qmin)7172   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, qmin) {
7173     TEST_REQUIRES_ARM_NEON;
7174     GemmMicrokernelTester()
7175       .mr(3)
7176       .nr(16)
7177       .kr(1)
7178       .sr(1)
7179       .m(3)
7180       .n(16)
7181       .k(8)
7182       .qmin(128)
7183       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7184   }
7185 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,qmax)7186   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, qmax) {
7187     TEST_REQUIRES_ARM_NEON;
7188     GemmMicrokernelTester()
7189       .mr(3)
7190       .nr(16)
7191       .kr(1)
7192       .sr(1)
7193       .m(3)
7194       .n(16)
7195       .k(8)
7196       .qmax(128)
7197       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7198   }
7199 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,strided_cm)7200   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, strided_cm) {
7201     TEST_REQUIRES_ARM_NEON;
7202     GemmMicrokernelTester()
7203       .mr(3)
7204       .nr(16)
7205       .kr(1)
7206       .sr(1)
7207       .m(3)
7208       .n(16)
7209       .k(8)
7210       .cm_stride(19)
7211       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7212   }
7213 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,no_a_zero_point)7214   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, no_a_zero_point) {
7215     TEST_REQUIRES_ARM_NEON;
7216     for (size_t k = 1; k <= 40; k += 9) {
7217       GemmMicrokernelTester()
7218         .mr(3)
7219         .nr(16)
7220         .kr(1)
7221         .sr(1)
7222         .m(3)
7223         .n(16)
7224         .k(k)
7225         .a_zero_point(0)
7226         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7227     }
7228   }
7229 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,no_b_zero_point)7230   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, no_b_zero_point) {
7231     TEST_REQUIRES_ARM_NEON;
7232     for (size_t k = 1; k <= 40; k += 9) {
7233       GemmMicrokernelTester()
7234         .mr(3)
7235         .nr(16)
7236         .kr(1)
7237         .sr(1)
7238         .m(3)
7239         .n(16)
7240         .k(k)
7241         .b_zero_point(0)
7242         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7243     }
7244   }
7245 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,no_zero_point)7246   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, no_zero_point) {
7247     TEST_REQUIRES_ARM_NEON;
7248     for (size_t k = 1; k <= 40; k += 9) {
7249       GemmMicrokernelTester()
7250         .mr(3)
7251         .nr(16)
7252         .kr(1)
7253         .sr(1)
7254         .m(3)
7255         .n(16)
7256         .k(k)
7257         .a_zero_point(0)
7258         .b_zero_point(0)
7259         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7260     }
7261   }
7262 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
7263 
7264 
7265 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,k_eq_8)7266   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_eq_8) {
7267     TEST_REQUIRES_ARM_NEON_DOT;
7268     GemmMicrokernelTester()
7269       .mr(3)
7270       .nr(16)
7271       .kr(4)
7272       .sr(1)
7273       .m(3)
7274       .n(16)
7275       .k(8)
7276       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7277   }
7278 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,strided_cn)7279   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, strided_cn) {
7280     TEST_REQUIRES_ARM_NEON_DOT;
7281     GemmMicrokernelTester()
7282       .mr(3)
7283       .nr(16)
7284       .kr(4)
7285       .sr(1)
7286       .m(3)
7287       .n(16)
7288       .k(8)
7289       .cn_stride(19)
7290       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7291   }
7292 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,k_eq_8_subtile)7293   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_eq_8_subtile) {
7294     TEST_REQUIRES_ARM_NEON_DOT;
7295     for (uint32_t n = 1; n <= 16; n++) {
7296       for (uint32_t m = 1; m <= 3; m++) {
7297         GemmMicrokernelTester()
7298           .mr(3)
7299           .nr(16)
7300           .kr(4)
7301           .sr(1)
7302           .m(m)
7303           .n(n)
7304           .k(8)
7305           .iterations(1)
7306           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7307       }
7308     }
7309   }
7310 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,k_eq_8_subtile_m)7311   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_eq_8_subtile_m) {
7312     TEST_REQUIRES_ARM_NEON_DOT;
7313     for (uint32_t m = 1; m <= 3; m++) {
7314       GemmMicrokernelTester()
7315         .mr(3)
7316         .nr(16)
7317         .kr(4)
7318         .sr(1)
7319         .m(m)
7320         .n(16)
7321         .k(8)
7322         .iterations(1)
7323         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7324     }
7325   }
7326 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,k_eq_8_subtile_n)7327   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_eq_8_subtile_n) {
7328     TEST_REQUIRES_ARM_NEON_DOT;
7329     for (uint32_t n = 1; n <= 16; n++) {
7330       GemmMicrokernelTester()
7331         .mr(3)
7332         .nr(16)
7333         .kr(4)
7334         .sr(1)
7335         .m(3)
7336         .n(n)
7337         .k(8)
7338         .iterations(1)
7339         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7340     }
7341   }
7342 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,k_lt_8)7343   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_lt_8) {
7344     TEST_REQUIRES_ARM_NEON_DOT;
7345     for (size_t k = 1; k < 8; k++) {
7346       GemmMicrokernelTester()
7347         .mr(3)
7348         .nr(16)
7349         .kr(4)
7350         .sr(1)
7351         .m(3)
7352         .n(16)
7353         .k(k)
7354         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7355     }
7356   }
7357 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,k_lt_8_subtile)7358   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_lt_8_subtile) {
7359     TEST_REQUIRES_ARM_NEON_DOT;
7360     for (size_t k = 1; k < 8; k++) {
7361       for (uint32_t n = 1; n <= 16; n++) {
7362         for (uint32_t m = 1; m <= 3; m++) {
7363           GemmMicrokernelTester()
7364             .mr(3)
7365             .nr(16)
7366             .kr(4)
7367             .sr(1)
7368             .m(m)
7369             .n(n)
7370             .k(k)
7371             .iterations(1)
7372             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7373         }
7374       }
7375     }
7376   }
7377 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,k_gt_8)7378   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_gt_8) {
7379     TEST_REQUIRES_ARM_NEON_DOT;
7380     for (size_t k = 9; k < 16; k++) {
7381       GemmMicrokernelTester()
7382         .mr(3)
7383         .nr(16)
7384         .kr(4)
7385         .sr(1)
7386         .m(3)
7387         .n(16)
7388         .k(k)
7389         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7390     }
7391   }
7392 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,k_gt_8_subtile)7393   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_gt_8_subtile) {
7394     TEST_REQUIRES_ARM_NEON_DOT;
7395     for (size_t k = 9; k < 16; k++) {
7396       for (uint32_t n = 1; n <= 16; n++) {
7397         for (uint32_t m = 1; m <= 3; m++) {
7398           GemmMicrokernelTester()
7399             .mr(3)
7400             .nr(16)
7401             .kr(4)
7402             .sr(1)
7403             .m(m)
7404             .n(n)
7405             .k(k)
7406             .iterations(1)
7407             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7408         }
7409       }
7410     }
7411   }
7412 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,k_div_8)7413   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_div_8) {
7414     TEST_REQUIRES_ARM_NEON_DOT;
7415     for (size_t k = 16; k <= 80; k += 8) {
7416       GemmMicrokernelTester()
7417         .mr(3)
7418         .nr(16)
7419         .kr(4)
7420         .sr(1)
7421         .m(3)
7422         .n(16)
7423         .k(k)
7424         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7425     }
7426   }
7427 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,k_div_8_subtile)7428   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_div_8_subtile) {
7429     TEST_REQUIRES_ARM_NEON_DOT;
7430     for (size_t k = 16; k <= 80; k += 8) {
7431       for (uint32_t n = 1; n <= 16; n++) {
7432         for (uint32_t m = 1; m <= 3; m++) {
7433           GemmMicrokernelTester()
7434             .mr(3)
7435             .nr(16)
7436             .kr(4)
7437             .sr(1)
7438             .m(m)
7439             .n(n)
7440             .k(k)
7441             .iterations(1)
7442             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7443         }
7444       }
7445     }
7446   }
7447 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,n_gt_16)7448   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, n_gt_16) {
7449     TEST_REQUIRES_ARM_NEON_DOT;
7450     for (uint32_t n = 17; n < 32; n++) {
7451       for (size_t k = 1; k <= 40; k += 9) {
7452         GemmMicrokernelTester()
7453           .mr(3)
7454           .nr(16)
7455           .kr(4)
7456           .sr(1)
7457           .m(3)
7458           .n(n)
7459           .k(k)
7460           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7461       }
7462     }
7463   }
7464 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,n_gt_16_strided_cn)7465   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, n_gt_16_strided_cn) {
7466     TEST_REQUIRES_ARM_NEON_DOT;
7467     for (uint32_t n = 17; n < 32; n++) {
7468       for (size_t k = 1; k <= 40; k += 9) {
7469         GemmMicrokernelTester()
7470           .mr(3)
7471           .nr(16)
7472           .kr(4)
7473           .sr(1)
7474           .m(3)
7475           .n(n)
7476           .k(k)
7477           .cn_stride(19)
7478           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7479       }
7480     }
7481   }
7482 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,n_gt_16_subtile)7483   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, n_gt_16_subtile) {
7484     TEST_REQUIRES_ARM_NEON_DOT;
7485     for (uint32_t n = 17; n < 32; n++) {
7486       for (size_t k = 1; k <= 40; k += 9) {
7487         for (uint32_t m = 1; m <= 3; m++) {
7488           GemmMicrokernelTester()
7489             .mr(3)
7490             .nr(16)
7491             .kr(4)
7492             .sr(1)
7493             .m(m)
7494             .n(n)
7495             .k(k)
7496             .iterations(1)
7497             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7498         }
7499       }
7500     }
7501   }
7502 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,n_div_16)7503   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, n_div_16) {
7504     TEST_REQUIRES_ARM_NEON_DOT;
7505     for (uint32_t n = 32; n <= 48; n += 16) {
7506       for (size_t k = 1; k <= 40; k += 9) {
7507         GemmMicrokernelTester()
7508           .mr(3)
7509           .nr(16)
7510           .kr(4)
7511           .sr(1)
7512           .m(3)
7513           .n(n)
7514           .k(k)
7515           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7516       }
7517     }
7518   }
7519 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,n_div_16_strided_cn)7520   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, n_div_16_strided_cn) {
7521     TEST_REQUIRES_ARM_NEON_DOT;
7522     for (uint32_t n = 32; n <= 48; n += 16) {
7523       for (size_t k = 1; k <= 40; k += 9) {
7524         GemmMicrokernelTester()
7525           .mr(3)
7526           .nr(16)
7527           .kr(4)
7528           .sr(1)
7529           .m(3)
7530           .n(n)
7531           .k(k)
7532           .cn_stride(19)
7533           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7534       }
7535     }
7536   }
7537 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,n_div_16_subtile)7538   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, n_div_16_subtile) {
7539     TEST_REQUIRES_ARM_NEON_DOT;
7540     for (uint32_t n = 32; n <= 48; n += 16) {
7541       for (size_t k = 1; k <= 40; k += 9) {
7542         for (uint32_t m = 1; m <= 3; m++) {
7543           GemmMicrokernelTester()
7544             .mr(3)
7545             .nr(16)
7546             .kr(4)
7547             .sr(1)
7548             .m(m)
7549             .n(n)
7550             .k(k)
7551             .iterations(1)
7552             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7553         }
7554       }
7555     }
7556   }
7557 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,small_kernel)7558   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, small_kernel) {
7559     TEST_REQUIRES_ARM_NEON_DOT;
7560     for (size_t k = 1; k <= 40; k += 9) {
7561       GemmMicrokernelTester()
7562         .mr(3)
7563         .nr(16)
7564         .kr(4)
7565         .sr(1)
7566         .m(3)
7567         .n(16)
7568         .k(k)
7569         .ks(3)
7570         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7571     }
7572   }
7573 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,small_kernel_subtile)7574   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, small_kernel_subtile) {
7575     TEST_REQUIRES_ARM_NEON_DOT;
7576     for (size_t k = 1; k <= 40; k += 9) {
7577       for (uint32_t n = 1; n <= 16; n++) {
7578         for (uint32_t m = 1; m <= 3; m++) {
7579           GemmMicrokernelTester()
7580             .mr(3)
7581             .nr(16)
7582             .kr(4)
7583             .sr(1)
7584             .m(m)
7585             .n(n)
7586             .k(k)
7587             .ks(3)
7588             .iterations(1)
7589             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7590         }
7591       }
7592     }
7593   }
7594 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,n_gt_16_small_kernel)7595   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, n_gt_16_small_kernel) {
7596     TEST_REQUIRES_ARM_NEON_DOT;
7597     for (uint32_t n = 17; n < 32; n++) {
7598       for (size_t k = 1; k <= 40; k += 9) {
7599         GemmMicrokernelTester()
7600           .mr(3)
7601           .nr(16)
7602           .kr(4)
7603           .sr(1)
7604           .m(3)
7605           .n(n)
7606           .k(k)
7607           .ks(3)
7608           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7609       }
7610     }
7611   }
7612 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,n_div_16_small_kernel)7613   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, n_div_16_small_kernel) {
7614     TEST_REQUIRES_ARM_NEON_DOT;
7615     for (uint32_t n = 32; n <= 48; n += 16) {
7616       for (size_t k = 1; k <= 40; k += 9) {
7617         GemmMicrokernelTester()
7618           .mr(3)
7619           .nr(16)
7620           .kr(4)
7621           .sr(1)
7622           .m(3)
7623           .n(n)
7624           .k(k)
7625           .ks(3)
7626           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7627       }
7628     }
7629   }
7630 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,strided_cm_subtile)7631   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, strided_cm_subtile) {
7632     TEST_REQUIRES_ARM_NEON_DOT;
7633     for (size_t k = 1; k <= 40; k += 9) {
7634       for (uint32_t n = 1; n <= 16; n++) {
7635         for (uint32_t m = 1; m <= 3; m++) {
7636           GemmMicrokernelTester()
7637             .mr(3)
7638             .nr(16)
7639             .kr(4)
7640             .sr(1)
7641             .m(m)
7642             .n(n)
7643             .k(k)
7644             .cm_stride(19)
7645             .iterations(1)
7646             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7647         }
7648       }
7649     }
7650   }
7651 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,a_offset)7652   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, a_offset) {
7653     TEST_REQUIRES_ARM_NEON_DOT;
7654     for (size_t k = 1; k <= 40; k += 9) {
7655       GemmMicrokernelTester()
7656         .mr(3)
7657         .nr(16)
7658         .kr(4)
7659         .sr(1)
7660         .m(3)
7661         .n(16)
7662         .k(k)
7663         .ks(3)
7664         .a_offset(127)
7665         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7666     }
7667   }
7668 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,zero)7669   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, zero) {
7670     TEST_REQUIRES_ARM_NEON_DOT;
7671     for (size_t k = 1; k <= 40; k += 9) {
7672       for (uint32_t mz = 0; mz < 3; mz++) {
7673         GemmMicrokernelTester()
7674           .mr(3)
7675           .nr(16)
7676           .kr(4)
7677           .sr(1)
7678           .m(3)
7679           .n(16)
7680           .k(k)
7681           .ks(3)
7682           .a_offset(127)
7683           .zero_index(mz)
7684           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7685       }
7686     }
7687   }
7688 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,qmin)7689   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, qmin) {
7690     TEST_REQUIRES_ARM_NEON_DOT;
7691     GemmMicrokernelTester()
7692       .mr(3)
7693       .nr(16)
7694       .kr(4)
7695       .sr(1)
7696       .m(3)
7697       .n(16)
7698       .k(8)
7699       .qmin(128)
7700       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7701   }
7702 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,qmax)7703   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, qmax) {
7704     TEST_REQUIRES_ARM_NEON_DOT;
7705     GemmMicrokernelTester()
7706       .mr(3)
7707       .nr(16)
7708       .kr(4)
7709       .sr(1)
7710       .m(3)
7711       .n(16)
7712       .k(8)
7713       .qmax(128)
7714       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7715   }
7716 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,strided_cm)7717   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, strided_cm) {
7718     TEST_REQUIRES_ARM_NEON_DOT;
7719     GemmMicrokernelTester()
7720       .mr(3)
7721       .nr(16)
7722       .kr(4)
7723       .sr(1)
7724       .m(3)
7725       .n(16)
7726       .k(8)
7727       .cm_stride(19)
7728       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7729   }
7730 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,no_a_zero_point)7731   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, no_a_zero_point) {
7732     TEST_REQUIRES_ARM_NEON_DOT;
7733     for (size_t k = 1; k <= 40; k += 9) {
7734       GemmMicrokernelTester()
7735         .mr(3)
7736         .nr(16)
7737         .kr(4)
7738         .sr(1)
7739         .m(3)
7740         .n(16)
7741         .k(k)
7742         .a_zero_point(0)
7743         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7744     }
7745   }
7746 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,no_b_zero_point)7747   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, no_b_zero_point) {
7748     TEST_REQUIRES_ARM_NEON_DOT;
7749     for (size_t k = 1; k <= 40; k += 9) {
7750       GemmMicrokernelTester()
7751         .mr(3)
7752         .nr(16)
7753         .kr(4)
7754         .sr(1)
7755         .m(3)
7756         .n(16)
7757         .k(k)
7758         .b_zero_point(0)
7759         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7760     }
7761   }
7762 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,no_zero_point)7763   TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, no_zero_point) {
7764     TEST_REQUIRES_ARM_NEON_DOT;
7765     for (size_t k = 1; k <= 40; k += 9) {
7766       GemmMicrokernelTester()
7767         .mr(3)
7768         .nr(16)
7769         .kr(4)
7770         .sr(1)
7771         .m(3)
7772         .n(16)
7773         .k(k)
7774         .a_zero_point(0)
7775         .b_zero_point(0)
7776         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7777     }
7778   }
7779 #endif  // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
7780 
7781 
7782 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,k_eq_8)7783   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_eq_8) {
7784     TEST_REQUIRES_ARM_NEON_DOT;
7785     GemmMicrokernelTester()
7786       .mr(3)
7787       .nr(32)
7788       .kr(4)
7789       .sr(1)
7790       .m(3)
7791       .n(32)
7792       .k(8)
7793       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7794   }
7795 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,strided_cn)7796   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, strided_cn) {
7797     TEST_REQUIRES_ARM_NEON_DOT;
7798     GemmMicrokernelTester()
7799       .mr(3)
7800       .nr(32)
7801       .kr(4)
7802       .sr(1)
7803       .m(3)
7804       .n(32)
7805       .k(8)
7806       .cn_stride(37)
7807       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7808   }
7809 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,k_eq_8_subtile)7810   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_eq_8_subtile) {
7811     TEST_REQUIRES_ARM_NEON_DOT;
7812     for (uint32_t n = 1; n <= 32; n++) {
7813       for (uint32_t m = 1; m <= 3; m++) {
7814         GemmMicrokernelTester()
7815           .mr(3)
7816           .nr(32)
7817           .kr(4)
7818           .sr(1)
7819           .m(m)
7820           .n(n)
7821           .k(8)
7822           .iterations(1)
7823           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7824       }
7825     }
7826   }
7827 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,k_eq_8_subtile_m)7828   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_eq_8_subtile_m) {
7829     TEST_REQUIRES_ARM_NEON_DOT;
7830     for (uint32_t m = 1; m <= 3; m++) {
7831       GemmMicrokernelTester()
7832         .mr(3)
7833         .nr(32)
7834         .kr(4)
7835         .sr(1)
7836         .m(m)
7837         .n(32)
7838         .k(8)
7839         .iterations(1)
7840         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7841     }
7842   }
7843 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,k_eq_8_subtile_n)7844   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_eq_8_subtile_n) {
7845     TEST_REQUIRES_ARM_NEON_DOT;
7846     for (uint32_t n = 1; n <= 32; n++) {
7847       GemmMicrokernelTester()
7848         .mr(3)
7849         .nr(32)
7850         .kr(4)
7851         .sr(1)
7852         .m(3)
7853         .n(n)
7854         .k(8)
7855         .iterations(1)
7856         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7857     }
7858   }
7859 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,k_lt_8)7860   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_lt_8) {
7861     TEST_REQUIRES_ARM_NEON_DOT;
7862     for (size_t k = 1; k < 8; k++) {
7863       GemmMicrokernelTester()
7864         .mr(3)
7865         .nr(32)
7866         .kr(4)
7867         .sr(1)
7868         .m(3)
7869         .n(32)
7870         .k(k)
7871         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7872     }
7873   }
7874 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,k_lt_8_subtile)7875   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_lt_8_subtile) {
7876     TEST_REQUIRES_ARM_NEON_DOT;
7877     for (size_t k = 1; k < 8; k++) {
7878       for (uint32_t n = 1; n <= 32; n++) {
7879         for (uint32_t m = 1; m <= 3; m++) {
7880           GemmMicrokernelTester()
7881             .mr(3)
7882             .nr(32)
7883             .kr(4)
7884             .sr(1)
7885             .m(m)
7886             .n(n)
7887             .k(k)
7888             .iterations(1)
7889             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7890         }
7891       }
7892     }
7893   }
7894 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,k_gt_8)7895   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_gt_8) {
7896     TEST_REQUIRES_ARM_NEON_DOT;
7897     for (size_t k = 9; k < 16; k++) {
7898       GemmMicrokernelTester()
7899         .mr(3)
7900         .nr(32)
7901         .kr(4)
7902         .sr(1)
7903         .m(3)
7904         .n(32)
7905         .k(k)
7906         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7907     }
7908   }
7909 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,k_gt_8_subtile)7910   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_gt_8_subtile) {
7911     TEST_REQUIRES_ARM_NEON_DOT;
7912     for (size_t k = 9; k < 16; k++) {
7913       for (uint32_t n = 1; n <= 32; n++) {
7914         for (uint32_t m = 1; m <= 3; m++) {
7915           GemmMicrokernelTester()
7916             .mr(3)
7917             .nr(32)
7918             .kr(4)
7919             .sr(1)
7920             .m(m)
7921             .n(n)
7922             .k(k)
7923             .iterations(1)
7924             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7925         }
7926       }
7927     }
7928   }
7929 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,k_div_8)7930   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_div_8) {
7931     TEST_REQUIRES_ARM_NEON_DOT;
7932     for (size_t k = 16; k <= 80; k += 8) {
7933       GemmMicrokernelTester()
7934         .mr(3)
7935         .nr(32)
7936         .kr(4)
7937         .sr(1)
7938         .m(3)
7939         .n(32)
7940         .k(k)
7941         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7942     }
7943   }
7944 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,k_div_8_subtile)7945   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_div_8_subtile) {
7946     TEST_REQUIRES_ARM_NEON_DOT;
7947     for (size_t k = 16; k <= 80; k += 8) {
7948       for (uint32_t n = 1; n <= 32; n++) {
7949         for (uint32_t m = 1; m <= 3; m++) {
7950           GemmMicrokernelTester()
7951             .mr(3)
7952             .nr(32)
7953             .kr(4)
7954             .sr(1)
7955             .m(m)
7956             .n(n)
7957             .k(k)
7958             .iterations(1)
7959             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7960         }
7961       }
7962     }
7963   }
7964 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,n_gt_32)7965   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, n_gt_32) {
7966     TEST_REQUIRES_ARM_NEON_DOT;
7967     for (uint32_t n = 33; n < 64; n++) {
7968       for (size_t k = 1; k <= 40; k += 9) {
7969         GemmMicrokernelTester()
7970           .mr(3)
7971           .nr(32)
7972           .kr(4)
7973           .sr(1)
7974           .m(3)
7975           .n(n)
7976           .k(k)
7977           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7978       }
7979     }
7980   }
7981 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,n_gt_32_strided_cn)7982   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, n_gt_32_strided_cn) {
7983     TEST_REQUIRES_ARM_NEON_DOT;
7984     for (uint32_t n = 33; n < 64; n++) {
7985       for (size_t k = 1; k <= 40; k += 9) {
7986         GemmMicrokernelTester()
7987           .mr(3)
7988           .nr(32)
7989           .kr(4)
7990           .sr(1)
7991           .m(3)
7992           .n(n)
7993           .k(k)
7994           .cn_stride(37)
7995           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7996       }
7997     }
7998   }
7999 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,n_gt_32_subtile)8000   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, n_gt_32_subtile) {
8001     TEST_REQUIRES_ARM_NEON_DOT;
8002     for (uint32_t n = 33; n < 64; n++) {
8003       for (size_t k = 1; k <= 40; k += 9) {
8004         for (uint32_t m = 1; m <= 3; m++) {
8005           GemmMicrokernelTester()
8006             .mr(3)
8007             .nr(32)
8008             .kr(4)
8009             .sr(1)
8010             .m(m)
8011             .n(n)
8012             .k(k)
8013             .iterations(1)
8014             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8015         }
8016       }
8017     }
8018   }
8019 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,n_div_32)8020   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, n_div_32) {
8021     TEST_REQUIRES_ARM_NEON_DOT;
8022     for (uint32_t n = 64; n <= 96; n += 32) {
8023       for (size_t k = 1; k <= 40; k += 9) {
8024         GemmMicrokernelTester()
8025           .mr(3)
8026           .nr(32)
8027           .kr(4)
8028           .sr(1)
8029           .m(3)
8030           .n(n)
8031           .k(k)
8032           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8033       }
8034     }
8035   }
8036 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,n_div_32_strided_cn)8037   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, n_div_32_strided_cn) {
8038     TEST_REQUIRES_ARM_NEON_DOT;
8039     for (uint32_t n = 64; n <= 96; n += 32) {
8040       for (size_t k = 1; k <= 40; k += 9) {
8041         GemmMicrokernelTester()
8042           .mr(3)
8043           .nr(32)
8044           .kr(4)
8045           .sr(1)
8046           .m(3)
8047           .n(n)
8048           .k(k)
8049           .cn_stride(37)
8050           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8051       }
8052     }
8053   }
8054 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,n_div_32_subtile)8055   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, n_div_32_subtile) {
8056     TEST_REQUIRES_ARM_NEON_DOT;
8057     for (uint32_t n = 64; n <= 96; n += 32) {
8058       for (size_t k = 1; k <= 40; k += 9) {
8059         for (uint32_t m = 1; m <= 3; m++) {
8060           GemmMicrokernelTester()
8061             .mr(3)
8062             .nr(32)
8063             .kr(4)
8064             .sr(1)
8065             .m(m)
8066             .n(n)
8067             .k(k)
8068             .iterations(1)
8069             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8070         }
8071       }
8072     }
8073   }
8074 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,small_kernel)8075   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, small_kernel) {
8076     TEST_REQUIRES_ARM_NEON_DOT;
8077     for (size_t k = 1; k <= 40; k += 9) {
8078       GemmMicrokernelTester()
8079         .mr(3)
8080         .nr(32)
8081         .kr(4)
8082         .sr(1)
8083         .m(3)
8084         .n(32)
8085         .k(k)
8086         .ks(3)
8087         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8088     }
8089   }
8090 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,small_kernel_subtile)8091   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, small_kernel_subtile) {
8092     TEST_REQUIRES_ARM_NEON_DOT;
8093     for (size_t k = 1; k <= 40; k += 9) {
8094       for (uint32_t n = 1; n <= 32; n++) {
8095         for (uint32_t m = 1; m <= 3; m++) {
8096           GemmMicrokernelTester()
8097             .mr(3)
8098             .nr(32)
8099             .kr(4)
8100             .sr(1)
8101             .m(m)
8102             .n(n)
8103             .k(k)
8104             .ks(3)
8105             .iterations(1)
8106             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8107         }
8108       }
8109     }
8110   }
8111 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,n_gt_32_small_kernel)8112   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, n_gt_32_small_kernel) {
8113     TEST_REQUIRES_ARM_NEON_DOT;
8114     for (uint32_t n = 33; n < 64; n++) {
8115       for (size_t k = 1; k <= 40; k += 9) {
8116         GemmMicrokernelTester()
8117           .mr(3)
8118           .nr(32)
8119           .kr(4)
8120           .sr(1)
8121           .m(3)
8122           .n(n)
8123           .k(k)
8124           .ks(3)
8125           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8126       }
8127     }
8128   }
8129 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,n_div_32_small_kernel)8130   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, n_div_32_small_kernel) {
8131     TEST_REQUIRES_ARM_NEON_DOT;
8132     for (uint32_t n = 64; n <= 96; n += 32) {
8133       for (size_t k = 1; k <= 40; k += 9) {
8134         GemmMicrokernelTester()
8135           .mr(3)
8136           .nr(32)
8137           .kr(4)
8138           .sr(1)
8139           .m(3)
8140           .n(n)
8141           .k(k)
8142           .ks(3)
8143           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8144       }
8145     }
8146   }
8147 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,strided_cm_subtile)8148   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, strided_cm_subtile) {
8149     TEST_REQUIRES_ARM_NEON_DOT;
8150     for (size_t k = 1; k <= 40; k += 9) {
8151       for (uint32_t n = 1; n <= 32; n++) {
8152         for (uint32_t m = 1; m <= 3; m++) {
8153           GemmMicrokernelTester()
8154             .mr(3)
8155             .nr(32)
8156             .kr(4)
8157             .sr(1)
8158             .m(m)
8159             .n(n)
8160             .k(k)
8161             .cm_stride(37)
8162             .iterations(1)
8163             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8164         }
8165       }
8166     }
8167   }
8168 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,a_offset)8169   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, a_offset) {
8170     TEST_REQUIRES_ARM_NEON_DOT;
8171     for (size_t k = 1; k <= 40; k += 9) {
8172       GemmMicrokernelTester()
8173         .mr(3)
8174         .nr(32)
8175         .kr(4)
8176         .sr(1)
8177         .m(3)
8178         .n(32)
8179         .k(k)
8180         .ks(3)
8181         .a_offset(127)
8182         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8183     }
8184   }
8185 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,zero)8186   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, zero) {
8187     TEST_REQUIRES_ARM_NEON_DOT;
8188     for (size_t k = 1; k <= 40; k += 9) {
8189       for (uint32_t mz = 0; mz < 3; mz++) {
8190         GemmMicrokernelTester()
8191           .mr(3)
8192           .nr(32)
8193           .kr(4)
8194           .sr(1)
8195           .m(3)
8196           .n(32)
8197           .k(k)
8198           .ks(3)
8199           .a_offset(127)
8200           .zero_index(mz)
8201           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8202       }
8203     }
8204   }
8205 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,qmin)8206   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, qmin) {
8207     TEST_REQUIRES_ARM_NEON_DOT;
8208     GemmMicrokernelTester()
8209       .mr(3)
8210       .nr(32)
8211       .kr(4)
8212       .sr(1)
8213       .m(3)
8214       .n(32)
8215       .k(8)
8216       .qmin(128)
8217       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8218   }
8219 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,qmax)8220   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, qmax) {
8221     TEST_REQUIRES_ARM_NEON_DOT;
8222     GemmMicrokernelTester()
8223       .mr(3)
8224       .nr(32)
8225       .kr(4)
8226       .sr(1)
8227       .m(3)
8228       .n(32)
8229       .k(8)
8230       .qmax(128)
8231       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8232   }
8233 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,strided_cm)8234   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, strided_cm) {
8235     TEST_REQUIRES_ARM_NEON_DOT;
8236     GemmMicrokernelTester()
8237       .mr(3)
8238       .nr(32)
8239       .kr(4)
8240       .sr(1)
8241       .m(3)
8242       .n(32)
8243       .k(8)
8244       .cm_stride(37)
8245       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8246   }
8247 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,no_a_zero_point)8248   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, no_a_zero_point) {
8249     TEST_REQUIRES_ARM_NEON_DOT;
8250     for (size_t k = 1; k <= 40; k += 9) {
8251       GemmMicrokernelTester()
8252         .mr(3)
8253         .nr(32)
8254         .kr(4)
8255         .sr(1)
8256         .m(3)
8257         .n(32)
8258         .k(k)
8259         .a_zero_point(0)
8260         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8261     }
8262   }
8263 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,no_b_zero_point)8264   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, no_b_zero_point) {
8265     TEST_REQUIRES_ARM_NEON_DOT;
8266     for (size_t k = 1; k <= 40; k += 9) {
8267       GemmMicrokernelTester()
8268         .mr(3)
8269         .nr(32)
8270         .kr(4)
8271         .sr(1)
8272         .m(3)
8273         .n(32)
8274         .k(k)
8275         .b_zero_point(0)
8276         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8277     }
8278   }
8279 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,no_zero_point)8280   TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, no_zero_point) {
8281     TEST_REQUIRES_ARM_NEON_DOT;
8282     for (size_t k = 1; k <= 40; k += 9) {
8283       GemmMicrokernelTester()
8284         .mr(3)
8285         .nr(32)
8286         .kr(4)
8287         .sr(1)
8288         .m(3)
8289         .n(32)
8290         .k(k)
8291         .a_zero_point(0)
8292         .b_zero_point(0)
8293         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8294     }
8295   }
8296 #endif  // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
8297 
8298 
8299 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_eq_8)8300   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8) {
8301     TEST_REQUIRES_ARM_NEON;
8302     GemmMicrokernelTester()
8303       .mr(4)
8304       .nr(8)
8305       .kr(1)
8306       .sr(1)
8307       .m(4)
8308       .n(8)
8309       .k(8)
8310       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8311   }
8312 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,strided_cn)8313   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, strided_cn) {
8314     TEST_REQUIRES_ARM_NEON;
8315     GemmMicrokernelTester()
8316       .mr(4)
8317       .nr(8)
8318       .kr(1)
8319       .sr(1)
8320       .m(4)
8321       .n(8)
8322       .k(8)
8323       .cn_stride(11)
8324       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8325   }
8326 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_eq_8_subtile)8327   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8_subtile) {
8328     TEST_REQUIRES_ARM_NEON;
8329     for (uint32_t n = 1; n <= 8; n++) {
8330       for (uint32_t m = 1; m <= 4; m++) {
8331         GemmMicrokernelTester()
8332           .mr(4)
8333           .nr(8)
8334           .kr(1)
8335           .sr(1)
8336           .m(m)
8337           .n(n)
8338           .k(8)
8339           .iterations(1)
8340           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8341       }
8342     }
8343   }
8344 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_eq_8_subtile_m)8345   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
8346     TEST_REQUIRES_ARM_NEON;
8347     for (uint32_t m = 1; m <= 4; m++) {
8348       GemmMicrokernelTester()
8349         .mr(4)
8350         .nr(8)
8351         .kr(1)
8352         .sr(1)
8353         .m(m)
8354         .n(8)
8355         .k(8)
8356         .iterations(1)
8357         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8358     }
8359   }
8360 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_eq_8_subtile_n)8361   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
8362     TEST_REQUIRES_ARM_NEON;
8363     for (uint32_t n = 1; n <= 8; n++) {
8364       GemmMicrokernelTester()
8365         .mr(4)
8366         .nr(8)
8367         .kr(1)
8368         .sr(1)
8369         .m(4)
8370         .n(n)
8371         .k(8)
8372         .iterations(1)
8373         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8374     }
8375   }
8376 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_lt_8)8377   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_lt_8) {
8378     TEST_REQUIRES_ARM_NEON;
8379     for (size_t k = 1; k < 8; k++) {
8380       GemmMicrokernelTester()
8381         .mr(4)
8382         .nr(8)
8383         .kr(1)
8384         .sr(1)
8385         .m(4)
8386         .n(8)
8387         .k(k)
8388         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8389     }
8390   }
8391 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_lt_8_subtile)8392   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_lt_8_subtile) {
8393     TEST_REQUIRES_ARM_NEON;
8394     for (size_t k = 1; k < 8; k++) {
8395       for (uint32_t n = 1; n <= 8; n++) {
8396         for (uint32_t m = 1; m <= 4; m++) {
8397           GemmMicrokernelTester()
8398             .mr(4)
8399             .nr(8)
8400             .kr(1)
8401             .sr(1)
8402             .m(m)
8403             .n(n)
8404             .k(k)
8405             .iterations(1)
8406             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8407         }
8408       }
8409     }
8410   }
8411 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_gt_8)8412   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_gt_8) {
8413     TEST_REQUIRES_ARM_NEON;
8414     for (size_t k = 9; k < 16; k++) {
8415       GemmMicrokernelTester()
8416         .mr(4)
8417         .nr(8)
8418         .kr(1)
8419         .sr(1)
8420         .m(4)
8421         .n(8)
8422         .k(k)
8423         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8424     }
8425   }
8426 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_gt_8_subtile)8427   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_gt_8_subtile) {
8428     TEST_REQUIRES_ARM_NEON;
8429     for (size_t k = 9; k < 16; k++) {
8430       for (uint32_t n = 1; n <= 8; n++) {
8431         for (uint32_t m = 1; m <= 4; m++) {
8432           GemmMicrokernelTester()
8433             .mr(4)
8434             .nr(8)
8435             .kr(1)
8436             .sr(1)
8437             .m(m)
8438             .n(n)
8439             .k(k)
8440             .iterations(1)
8441             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8442         }
8443       }
8444     }
8445   }
8446 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_div_8)8447   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_div_8) {
8448     TEST_REQUIRES_ARM_NEON;
8449     for (size_t k = 16; k <= 80; k += 8) {
8450       GemmMicrokernelTester()
8451         .mr(4)
8452         .nr(8)
8453         .kr(1)
8454         .sr(1)
8455         .m(4)
8456         .n(8)
8457         .k(k)
8458         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8459     }
8460   }
8461 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_div_8_subtile)8462   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_div_8_subtile) {
8463     TEST_REQUIRES_ARM_NEON;
8464     for (size_t k = 16; k <= 80; k += 8) {
8465       for (uint32_t n = 1; n <= 8; n++) {
8466         for (uint32_t m = 1; m <= 4; m++) {
8467           GemmMicrokernelTester()
8468             .mr(4)
8469             .nr(8)
8470             .kr(1)
8471             .sr(1)
8472             .m(m)
8473             .n(n)
8474             .k(k)
8475             .iterations(1)
8476             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8477         }
8478       }
8479     }
8480   }
8481 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,n_gt_8)8482   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8) {
8483     TEST_REQUIRES_ARM_NEON;
8484     for (uint32_t n = 9; n < 16; n++) {
8485       for (size_t k = 1; k <= 40; k += 9) {
8486         GemmMicrokernelTester()
8487           .mr(4)
8488           .nr(8)
8489           .kr(1)
8490           .sr(1)
8491           .m(4)
8492           .n(n)
8493           .k(k)
8494           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8495       }
8496     }
8497   }
8498 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,n_gt_8_strided_cn)8499   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
8500     TEST_REQUIRES_ARM_NEON;
8501     for (uint32_t n = 9; n < 16; n++) {
8502       for (size_t k = 1; k <= 40; k += 9) {
8503         GemmMicrokernelTester()
8504           .mr(4)
8505           .nr(8)
8506           .kr(1)
8507           .sr(1)
8508           .m(4)
8509           .n(n)
8510           .k(k)
8511           .cn_stride(11)
8512           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8513       }
8514     }
8515   }
8516 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,n_gt_8_subtile)8517   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8_subtile) {
8518     TEST_REQUIRES_ARM_NEON;
8519     for (uint32_t n = 9; n < 16; n++) {
8520       for (size_t k = 1; k <= 40; k += 9) {
8521         for (uint32_t m = 1; m <= 4; m++) {
8522           GemmMicrokernelTester()
8523             .mr(4)
8524             .nr(8)
8525             .kr(1)
8526             .sr(1)
8527             .m(m)
8528             .n(n)
8529             .k(k)
8530             .iterations(1)
8531             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8532         }
8533       }
8534     }
8535   }
8536 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,n_div_8)8537   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8) {
8538     TEST_REQUIRES_ARM_NEON;
8539     for (uint32_t n = 16; n <= 24; n += 8) {
8540       for (size_t k = 1; k <= 40; k += 9) {
8541         GemmMicrokernelTester()
8542           .mr(4)
8543           .nr(8)
8544           .kr(1)
8545           .sr(1)
8546           .m(4)
8547           .n(n)
8548           .k(k)
8549           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8550       }
8551     }
8552   }
8553 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,n_div_8_strided_cn)8554   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
8555     TEST_REQUIRES_ARM_NEON;
8556     for (uint32_t n = 16; n <= 24; n += 8) {
8557       for (size_t k = 1; k <= 40; k += 9) {
8558         GemmMicrokernelTester()
8559           .mr(4)
8560           .nr(8)
8561           .kr(1)
8562           .sr(1)
8563           .m(4)
8564           .n(n)
8565           .k(k)
8566           .cn_stride(11)
8567           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8568       }
8569     }
8570   }
8571 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,n_div_8_subtile)8572   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8_subtile) {
8573     TEST_REQUIRES_ARM_NEON;
8574     for (uint32_t n = 16; n <= 24; n += 8) {
8575       for (size_t k = 1; k <= 40; k += 9) {
8576         for (uint32_t m = 1; m <= 4; m++) {
8577           GemmMicrokernelTester()
8578             .mr(4)
8579             .nr(8)
8580             .kr(1)
8581             .sr(1)
8582             .m(m)
8583             .n(n)
8584             .k(k)
8585             .iterations(1)
8586             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8587         }
8588       }
8589     }
8590   }
8591 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,small_kernel)8592   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, small_kernel) {
8593     TEST_REQUIRES_ARM_NEON;
8594     for (size_t k = 1; k <= 40; k += 9) {
8595       GemmMicrokernelTester()
8596         .mr(4)
8597         .nr(8)
8598         .kr(1)
8599         .sr(1)
8600         .m(4)
8601         .n(8)
8602         .k(k)
8603         .ks(3)
8604         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8605     }
8606   }
8607 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,small_kernel_subtile)8608   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, small_kernel_subtile) {
8609     TEST_REQUIRES_ARM_NEON;
8610     for (size_t k = 1; k <= 40; k += 9) {
8611       for (uint32_t n = 1; n <= 8; n++) {
8612         for (uint32_t m = 1; m <= 4; m++) {
8613           GemmMicrokernelTester()
8614             .mr(4)
8615             .nr(8)
8616             .kr(1)
8617             .sr(1)
8618             .m(m)
8619             .n(n)
8620             .k(k)
8621             .ks(3)
8622             .iterations(1)
8623             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8624         }
8625       }
8626     }
8627   }
8628 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,n_gt_8_small_kernel)8629   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
8630     TEST_REQUIRES_ARM_NEON;
8631     for (uint32_t n = 9; n < 16; n++) {
8632       for (size_t k = 1; k <= 40; k += 9) {
8633         GemmMicrokernelTester()
8634           .mr(4)
8635           .nr(8)
8636           .kr(1)
8637           .sr(1)
8638           .m(4)
8639           .n(n)
8640           .k(k)
8641           .ks(3)
8642           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8643       }
8644     }
8645   }
8646 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,n_div_8_small_kernel)8647   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
8648     TEST_REQUIRES_ARM_NEON;
8649     for (uint32_t n = 16; n <= 24; n += 8) {
8650       for (size_t k = 1; k <= 40; k += 9) {
8651         GemmMicrokernelTester()
8652           .mr(4)
8653           .nr(8)
8654           .kr(1)
8655           .sr(1)
8656           .m(4)
8657           .n(n)
8658           .k(k)
8659           .ks(3)
8660           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8661       }
8662     }
8663   }
8664 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,strided_cm_subtile)8665   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, strided_cm_subtile) {
8666     TEST_REQUIRES_ARM_NEON;
8667     for (size_t k = 1; k <= 40; k += 9) {
8668       for (uint32_t n = 1; n <= 8; n++) {
8669         for (uint32_t m = 1; m <= 4; m++) {
8670           GemmMicrokernelTester()
8671             .mr(4)
8672             .nr(8)
8673             .kr(1)
8674             .sr(1)
8675             .m(m)
8676             .n(n)
8677             .k(k)
8678             .cm_stride(11)
8679             .iterations(1)
8680             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8681         }
8682       }
8683     }
8684   }
8685 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,a_offset)8686   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, a_offset) {
8687     TEST_REQUIRES_ARM_NEON;
8688     for (size_t k = 1; k <= 40; k += 9) {
8689       GemmMicrokernelTester()
8690         .mr(4)
8691         .nr(8)
8692         .kr(1)
8693         .sr(1)
8694         .m(4)
8695         .n(8)
8696         .k(k)
8697         .ks(3)
8698         .a_offset(163)
8699         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8700     }
8701   }
8702 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,zero)8703   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, zero) {
8704     TEST_REQUIRES_ARM_NEON;
8705     for (size_t k = 1; k <= 40; k += 9) {
8706       for (uint32_t mz = 0; mz < 4; mz++) {
8707         GemmMicrokernelTester()
8708           .mr(4)
8709           .nr(8)
8710           .kr(1)
8711           .sr(1)
8712           .m(4)
8713           .n(8)
8714           .k(k)
8715           .ks(3)
8716           .a_offset(163)
8717           .zero_index(mz)
8718           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8719       }
8720     }
8721   }
8722 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,qmin)8723   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, qmin) {
8724     TEST_REQUIRES_ARM_NEON;
8725     GemmMicrokernelTester()
8726       .mr(4)
8727       .nr(8)
8728       .kr(1)
8729       .sr(1)
8730       .m(4)
8731       .n(8)
8732       .k(8)
8733       .qmin(128)
8734       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8735   }
8736 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,qmax)8737   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, qmax) {
8738     TEST_REQUIRES_ARM_NEON;
8739     GemmMicrokernelTester()
8740       .mr(4)
8741       .nr(8)
8742       .kr(1)
8743       .sr(1)
8744       .m(4)
8745       .n(8)
8746       .k(8)
8747       .qmax(128)
8748       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8749   }
8750 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,strided_cm)8751   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, strided_cm) {
8752     TEST_REQUIRES_ARM_NEON;
8753     GemmMicrokernelTester()
8754       .mr(4)
8755       .nr(8)
8756       .kr(1)
8757       .sr(1)
8758       .m(4)
8759       .n(8)
8760       .k(8)
8761       .cm_stride(11)
8762       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8763   }
8764 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,no_a_zero_point)8765   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, no_a_zero_point) {
8766     TEST_REQUIRES_ARM_NEON;
8767     for (size_t k = 1; k <= 40; k += 9) {
8768       GemmMicrokernelTester()
8769         .mr(4)
8770         .nr(8)
8771         .kr(1)
8772         .sr(1)
8773         .m(4)
8774         .n(8)
8775         .k(k)
8776         .a_zero_point(0)
8777         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8778     }
8779   }
8780 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,no_b_zero_point)8781   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, no_b_zero_point) {
8782     TEST_REQUIRES_ARM_NEON;
8783     for (size_t k = 1; k <= 40; k += 9) {
8784       GemmMicrokernelTester()
8785         .mr(4)
8786         .nr(8)
8787         .kr(1)
8788         .sr(1)
8789         .m(4)
8790         .n(8)
8791         .k(k)
8792         .b_zero_point(0)
8793         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8794     }
8795   }
8796 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,no_zero_point)8797   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, no_zero_point) {
8798     TEST_REQUIRES_ARM_NEON;
8799     for (size_t k = 1; k <= 40; k += 9) {
8800       GemmMicrokernelTester()
8801         .mr(4)
8802         .nr(8)
8803         .kr(1)
8804         .sr(1)
8805         .m(4)
8806         .n(8)
8807         .k(k)
8808         .a_zero_point(0)
8809         .b_zero_point(0)
8810         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8811     }
8812   }
8813 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
8814 
8815 
8816 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_eq_8)8817   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8) {
8818     TEST_REQUIRES_ARM_NEON_DOT;
8819     GemmMicrokernelTester()
8820       .mr(4)
8821       .nr(16)
8822       .kr(4)
8823       .sr(1)
8824       .m(4)
8825       .n(16)
8826       .k(8)
8827       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8828   }
8829 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,strided_cn)8830   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, strided_cn) {
8831     TEST_REQUIRES_ARM_NEON_DOT;
8832     GemmMicrokernelTester()
8833       .mr(4)
8834       .nr(16)
8835       .kr(4)
8836       .sr(1)
8837       .m(4)
8838       .n(16)
8839       .k(8)
8840       .cn_stride(19)
8841       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8842   }
8843 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_eq_8_subtile)8844   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8_subtile) {
8845     TEST_REQUIRES_ARM_NEON_DOT;
8846     for (uint32_t n = 1; n <= 16; n++) {
8847       for (uint32_t m = 1; m <= 4; m++) {
8848         GemmMicrokernelTester()
8849           .mr(4)
8850           .nr(16)
8851           .kr(4)
8852           .sr(1)
8853           .m(m)
8854           .n(n)
8855           .k(8)
8856           .iterations(1)
8857           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8858       }
8859     }
8860   }
8861 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_eq_8_subtile_m)8862   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8_subtile_m) {
8863     TEST_REQUIRES_ARM_NEON_DOT;
8864     for (uint32_t m = 1; m <= 4; m++) {
8865       GemmMicrokernelTester()
8866         .mr(4)
8867         .nr(16)
8868         .kr(4)
8869         .sr(1)
8870         .m(m)
8871         .n(16)
8872         .k(8)
8873         .iterations(1)
8874         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8875     }
8876   }
8877 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_eq_8_subtile_n)8878   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8_subtile_n) {
8879     TEST_REQUIRES_ARM_NEON_DOT;
8880     for (uint32_t n = 1; n <= 16; n++) {
8881       GemmMicrokernelTester()
8882         .mr(4)
8883         .nr(16)
8884         .kr(4)
8885         .sr(1)
8886         .m(4)
8887         .n(n)
8888         .k(8)
8889         .iterations(1)
8890         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8891     }
8892   }
8893 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_lt_8)8894   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_lt_8) {
8895     TEST_REQUIRES_ARM_NEON_DOT;
8896     for (size_t k = 1; k < 8; k++) {
8897       GemmMicrokernelTester()
8898         .mr(4)
8899         .nr(16)
8900         .kr(4)
8901         .sr(1)
8902         .m(4)
8903         .n(16)
8904         .k(k)
8905         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8906     }
8907   }
8908 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_lt_8_subtile)8909   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_lt_8_subtile) {
8910     TEST_REQUIRES_ARM_NEON_DOT;
8911     for (size_t k = 1; k < 8; k++) {
8912       for (uint32_t n = 1; n <= 16; n++) {
8913         for (uint32_t m = 1; m <= 4; m++) {
8914           GemmMicrokernelTester()
8915             .mr(4)
8916             .nr(16)
8917             .kr(4)
8918             .sr(1)
8919             .m(m)
8920             .n(n)
8921             .k(k)
8922             .iterations(1)
8923             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8924         }
8925       }
8926     }
8927   }
8928 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_gt_8)8929   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_gt_8) {
8930     TEST_REQUIRES_ARM_NEON_DOT;
8931     for (size_t k = 9; k < 16; k++) {
8932       GemmMicrokernelTester()
8933         .mr(4)
8934         .nr(16)
8935         .kr(4)
8936         .sr(1)
8937         .m(4)
8938         .n(16)
8939         .k(k)
8940         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8941     }
8942   }
8943 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_gt_8_subtile)8944   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_gt_8_subtile) {
8945     TEST_REQUIRES_ARM_NEON_DOT;
8946     for (size_t k = 9; k < 16; k++) {
8947       for (uint32_t n = 1; n <= 16; n++) {
8948         for (uint32_t m = 1; m <= 4; m++) {
8949           GemmMicrokernelTester()
8950             .mr(4)
8951             .nr(16)
8952             .kr(4)
8953             .sr(1)
8954             .m(m)
8955             .n(n)
8956             .k(k)
8957             .iterations(1)
8958             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8959         }
8960       }
8961     }
8962   }
8963 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_div_8)8964   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_div_8) {
8965     TEST_REQUIRES_ARM_NEON_DOT;
8966     for (size_t k = 16; k <= 80; k += 8) {
8967       GemmMicrokernelTester()
8968         .mr(4)
8969         .nr(16)
8970         .kr(4)
8971         .sr(1)
8972         .m(4)
8973         .n(16)
8974         .k(k)
8975         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8976     }
8977   }
8978 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_div_8_subtile)8979   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_div_8_subtile) {
8980     TEST_REQUIRES_ARM_NEON_DOT;
8981     for (size_t k = 16; k <= 80; k += 8) {
8982       for (uint32_t n = 1; n <= 16; n++) {
8983         for (uint32_t m = 1; m <= 4; m++) {
8984           GemmMicrokernelTester()
8985             .mr(4)
8986             .nr(16)
8987             .kr(4)
8988             .sr(1)
8989             .m(m)
8990             .n(n)
8991             .k(k)
8992             .iterations(1)
8993             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8994         }
8995       }
8996     }
8997   }
8998 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,n_gt_16)8999   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16) {
9000     TEST_REQUIRES_ARM_NEON_DOT;
9001     for (uint32_t n = 17; n < 32; n++) {
9002       for (size_t k = 1; k <= 40; k += 9) {
9003         GemmMicrokernelTester()
9004           .mr(4)
9005           .nr(16)
9006           .kr(4)
9007           .sr(1)
9008           .m(4)
9009           .n(n)
9010           .k(k)
9011           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9012       }
9013     }
9014   }
9015 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,n_gt_16_strided_cn)9016   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16_strided_cn) {
9017     TEST_REQUIRES_ARM_NEON_DOT;
9018     for (uint32_t n = 17; n < 32; n++) {
9019       for (size_t k = 1; k <= 40; k += 9) {
9020         GemmMicrokernelTester()
9021           .mr(4)
9022           .nr(16)
9023           .kr(4)
9024           .sr(1)
9025           .m(4)
9026           .n(n)
9027           .k(k)
9028           .cn_stride(19)
9029           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9030       }
9031     }
9032   }
9033 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,n_gt_16_subtile)9034   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16_subtile) {
9035     TEST_REQUIRES_ARM_NEON_DOT;
9036     for (uint32_t n = 17; n < 32; n++) {
9037       for (size_t k = 1; k <= 40; k += 9) {
9038         for (uint32_t m = 1; m <= 4; m++) {
9039           GemmMicrokernelTester()
9040             .mr(4)
9041             .nr(16)
9042             .kr(4)
9043             .sr(1)
9044             .m(m)
9045             .n(n)
9046             .k(k)
9047             .iterations(1)
9048             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9049         }
9050       }
9051     }
9052   }
9053 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,n_div_16)9054   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16) {
9055     TEST_REQUIRES_ARM_NEON_DOT;
9056     for (uint32_t n = 32; n <= 48; n += 16) {
9057       for (size_t k = 1; k <= 40; k += 9) {
9058         GemmMicrokernelTester()
9059           .mr(4)
9060           .nr(16)
9061           .kr(4)
9062           .sr(1)
9063           .m(4)
9064           .n(n)
9065           .k(k)
9066           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9067       }
9068     }
9069   }
9070 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,n_div_16_strided_cn)9071   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16_strided_cn) {
9072     TEST_REQUIRES_ARM_NEON_DOT;
9073     for (uint32_t n = 32; n <= 48; n += 16) {
9074       for (size_t k = 1; k <= 40; k += 9) {
9075         GemmMicrokernelTester()
9076           .mr(4)
9077           .nr(16)
9078           .kr(4)
9079           .sr(1)
9080           .m(4)
9081           .n(n)
9082           .k(k)
9083           .cn_stride(19)
9084           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9085       }
9086     }
9087   }
9088 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,n_div_16_subtile)9089   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16_subtile) {
9090     TEST_REQUIRES_ARM_NEON_DOT;
9091     for (uint32_t n = 32; n <= 48; n += 16) {
9092       for (size_t k = 1; k <= 40; k += 9) {
9093         for (uint32_t m = 1; m <= 4; m++) {
9094           GemmMicrokernelTester()
9095             .mr(4)
9096             .nr(16)
9097             .kr(4)
9098             .sr(1)
9099             .m(m)
9100             .n(n)
9101             .k(k)
9102             .iterations(1)
9103             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9104         }
9105       }
9106     }
9107   }
9108 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,small_kernel)9109   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, small_kernel) {
9110     TEST_REQUIRES_ARM_NEON_DOT;
9111     for (size_t k = 1; k <= 40; k += 9) {
9112       GemmMicrokernelTester()
9113         .mr(4)
9114         .nr(16)
9115         .kr(4)
9116         .sr(1)
9117         .m(4)
9118         .n(16)
9119         .k(k)
9120         .ks(3)
9121         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9122     }
9123   }
9124 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,small_kernel_subtile)9125   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, small_kernel_subtile) {
9126     TEST_REQUIRES_ARM_NEON_DOT;
9127     for (size_t k = 1; k <= 40; k += 9) {
9128       for (uint32_t n = 1; n <= 16; n++) {
9129         for (uint32_t m = 1; m <= 4; m++) {
9130           GemmMicrokernelTester()
9131             .mr(4)
9132             .nr(16)
9133             .kr(4)
9134             .sr(1)
9135             .m(m)
9136             .n(n)
9137             .k(k)
9138             .ks(3)
9139             .iterations(1)
9140             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9141         }
9142       }
9143     }
9144   }
9145 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,n_gt_16_small_kernel)9146   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16_small_kernel) {
9147     TEST_REQUIRES_ARM_NEON_DOT;
9148     for (uint32_t n = 17; n < 32; n++) {
9149       for (size_t k = 1; k <= 40; k += 9) {
9150         GemmMicrokernelTester()
9151           .mr(4)
9152           .nr(16)
9153           .kr(4)
9154           .sr(1)
9155           .m(4)
9156           .n(n)
9157           .k(k)
9158           .ks(3)
9159           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9160       }
9161     }
9162   }
9163 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,n_div_16_small_kernel)9164   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16_small_kernel) {
9165     TEST_REQUIRES_ARM_NEON_DOT;
9166     for (uint32_t n = 32; n <= 48; n += 16) {
9167       for (size_t k = 1; k <= 40; k += 9) {
9168         GemmMicrokernelTester()
9169           .mr(4)
9170           .nr(16)
9171           .kr(4)
9172           .sr(1)
9173           .m(4)
9174           .n(n)
9175           .k(k)
9176           .ks(3)
9177           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9178       }
9179     }
9180   }
9181 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,strided_cm_subtile)9182   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, strided_cm_subtile) {
9183     TEST_REQUIRES_ARM_NEON_DOT;
9184     for (size_t k = 1; k <= 40; k += 9) {
9185       for (uint32_t n = 1; n <= 16; n++) {
9186         for (uint32_t m = 1; m <= 4; m++) {
9187           GemmMicrokernelTester()
9188             .mr(4)
9189             .nr(16)
9190             .kr(4)
9191             .sr(1)
9192             .m(m)
9193             .n(n)
9194             .k(k)
9195             .cm_stride(19)
9196             .iterations(1)
9197             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9198         }
9199       }
9200     }
9201   }
9202 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,a_offset)9203   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, a_offset) {
9204     TEST_REQUIRES_ARM_NEON_DOT;
9205     for (size_t k = 1; k <= 40; k += 9) {
9206       GemmMicrokernelTester()
9207         .mr(4)
9208         .nr(16)
9209         .kr(4)
9210         .sr(1)
9211         .m(4)
9212         .n(16)
9213         .k(k)
9214         .ks(3)
9215         .a_offset(163)
9216         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9217     }
9218   }
9219 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,zero)9220   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, zero) {
9221     TEST_REQUIRES_ARM_NEON_DOT;
9222     for (size_t k = 1; k <= 40; k += 9) {
9223       for (uint32_t mz = 0; mz < 4; mz++) {
9224         GemmMicrokernelTester()
9225           .mr(4)
9226           .nr(16)
9227           .kr(4)
9228           .sr(1)
9229           .m(4)
9230           .n(16)
9231           .k(k)
9232           .ks(3)
9233           .a_offset(163)
9234           .zero_index(mz)
9235           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9236       }
9237     }
9238   }
9239 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,qmin)9240   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, qmin) {
9241     TEST_REQUIRES_ARM_NEON_DOT;
9242     GemmMicrokernelTester()
9243       .mr(4)
9244       .nr(16)
9245       .kr(4)
9246       .sr(1)
9247       .m(4)
9248       .n(16)
9249       .k(8)
9250       .qmin(128)
9251       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9252   }
9253 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,qmax)9254   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, qmax) {
9255     TEST_REQUIRES_ARM_NEON_DOT;
9256     GemmMicrokernelTester()
9257       .mr(4)
9258       .nr(16)
9259       .kr(4)
9260       .sr(1)
9261       .m(4)
9262       .n(16)
9263       .k(8)
9264       .qmax(128)
9265       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9266   }
9267 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,strided_cm)9268   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, strided_cm) {
9269     TEST_REQUIRES_ARM_NEON_DOT;
9270     GemmMicrokernelTester()
9271       .mr(4)
9272       .nr(16)
9273       .kr(4)
9274       .sr(1)
9275       .m(4)
9276       .n(16)
9277       .k(8)
9278       .cm_stride(19)
9279       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9280   }
9281 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,no_a_zero_point)9282   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, no_a_zero_point) {
9283     TEST_REQUIRES_ARM_NEON_DOT;
9284     for (size_t k = 1; k <= 40; k += 9) {
9285       GemmMicrokernelTester()
9286         .mr(4)
9287         .nr(16)
9288         .kr(4)
9289         .sr(1)
9290         .m(4)
9291         .n(16)
9292         .k(k)
9293         .a_zero_point(0)
9294         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9295     }
9296   }
9297 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,no_b_zero_point)9298   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, no_b_zero_point) {
9299     TEST_REQUIRES_ARM_NEON_DOT;
9300     for (size_t k = 1; k <= 40; k += 9) {
9301       GemmMicrokernelTester()
9302         .mr(4)
9303         .nr(16)
9304         .kr(4)
9305         .sr(1)
9306         .m(4)
9307         .n(16)
9308         .k(k)
9309         .b_zero_point(0)
9310         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9311     }
9312   }
9313 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,no_zero_point)9314   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, no_zero_point) {
9315     TEST_REQUIRES_ARM_NEON_DOT;
9316     for (size_t k = 1; k <= 40; k += 9) {
9317       GemmMicrokernelTester()
9318         .mr(4)
9319         .nr(16)
9320         .kr(4)
9321         .sr(1)
9322         .m(4)
9323         .n(16)
9324         .k(k)
9325         .a_zero_point(0)
9326         .b_zero_point(0)
9327         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9328     }
9329   }
9330 #endif  // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
9331 
9332 
9333 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,k_eq_8)9334   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_eq_8) {
9335     TEST_REQUIRES_ARM_NEON_DOT;
9336     GemmMicrokernelTester()
9337       .mr(5)
9338       .nr(16)
9339       .kr(4)
9340       .sr(1)
9341       .m(5)
9342       .n(16)
9343       .k(8)
9344       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9345   }
9346 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,strided_cn)9347   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, strided_cn) {
9348     TEST_REQUIRES_ARM_NEON_DOT;
9349     GemmMicrokernelTester()
9350       .mr(5)
9351       .nr(16)
9352       .kr(4)
9353       .sr(1)
9354       .m(5)
9355       .n(16)
9356       .k(8)
9357       .cn_stride(19)
9358       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9359   }
9360 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,k_eq_8_subtile)9361   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_eq_8_subtile) {
9362     TEST_REQUIRES_ARM_NEON_DOT;
9363     for (uint32_t n = 1; n <= 16; n++) {
9364       for (uint32_t m = 1; m <= 5; m++) {
9365         GemmMicrokernelTester()
9366           .mr(5)
9367           .nr(16)
9368           .kr(4)
9369           .sr(1)
9370           .m(m)
9371           .n(n)
9372           .k(8)
9373           .iterations(1)
9374           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9375       }
9376     }
9377   }
9378 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,k_eq_8_subtile_m)9379   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_eq_8_subtile_m) {
9380     TEST_REQUIRES_ARM_NEON_DOT;
9381     for (uint32_t m = 1; m <= 5; m++) {
9382       GemmMicrokernelTester()
9383         .mr(5)
9384         .nr(16)
9385         .kr(4)
9386         .sr(1)
9387         .m(m)
9388         .n(16)
9389         .k(8)
9390         .iterations(1)
9391         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9392     }
9393   }
9394 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,k_eq_8_subtile_n)9395   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_eq_8_subtile_n) {
9396     TEST_REQUIRES_ARM_NEON_DOT;
9397     for (uint32_t n = 1; n <= 16; n++) {
9398       GemmMicrokernelTester()
9399         .mr(5)
9400         .nr(16)
9401         .kr(4)
9402         .sr(1)
9403         .m(5)
9404         .n(n)
9405         .k(8)
9406         .iterations(1)
9407         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9408     }
9409   }
9410 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,k_lt_8)9411   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_lt_8) {
9412     TEST_REQUIRES_ARM_NEON_DOT;
9413     for (size_t k = 1; k < 8; k++) {
9414       GemmMicrokernelTester()
9415         .mr(5)
9416         .nr(16)
9417         .kr(4)
9418         .sr(1)
9419         .m(5)
9420         .n(16)
9421         .k(k)
9422         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9423     }
9424   }
9425 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,k_lt_8_subtile)9426   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_lt_8_subtile) {
9427     TEST_REQUIRES_ARM_NEON_DOT;
9428     for (size_t k = 1; k < 8; k++) {
9429       for (uint32_t n = 1; n <= 16; n++) {
9430         for (uint32_t m = 1; m <= 5; m++) {
9431           GemmMicrokernelTester()
9432             .mr(5)
9433             .nr(16)
9434             .kr(4)
9435             .sr(1)
9436             .m(m)
9437             .n(n)
9438             .k(k)
9439             .iterations(1)
9440             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9441         }
9442       }
9443     }
9444   }
9445 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,k_gt_8)9446   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_gt_8) {
9447     TEST_REQUIRES_ARM_NEON_DOT;
9448     for (size_t k = 9; k < 16; k++) {
9449       GemmMicrokernelTester()
9450         .mr(5)
9451         .nr(16)
9452         .kr(4)
9453         .sr(1)
9454         .m(5)
9455         .n(16)
9456         .k(k)
9457         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9458     }
9459   }
9460 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,k_gt_8_subtile)9461   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_gt_8_subtile) {
9462     TEST_REQUIRES_ARM_NEON_DOT;
9463     for (size_t k = 9; k < 16; k++) {
9464       for (uint32_t n = 1; n <= 16; n++) {
9465         for (uint32_t m = 1; m <= 5; m++) {
9466           GemmMicrokernelTester()
9467             .mr(5)
9468             .nr(16)
9469             .kr(4)
9470             .sr(1)
9471             .m(m)
9472             .n(n)
9473             .k(k)
9474             .iterations(1)
9475             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9476         }
9477       }
9478     }
9479   }
9480 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,k_div_8)9481   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_div_8) {
9482     TEST_REQUIRES_ARM_NEON_DOT;
9483     for (size_t k = 16; k <= 80; k += 8) {
9484       GemmMicrokernelTester()
9485         .mr(5)
9486         .nr(16)
9487         .kr(4)
9488         .sr(1)
9489         .m(5)
9490         .n(16)
9491         .k(k)
9492         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9493     }
9494   }
9495 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,k_div_8_subtile)9496   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_div_8_subtile) {
9497     TEST_REQUIRES_ARM_NEON_DOT;
9498     for (size_t k = 16; k <= 80; k += 8) {
9499       for (uint32_t n = 1; n <= 16; n++) {
9500         for (uint32_t m = 1; m <= 5; m++) {
9501           GemmMicrokernelTester()
9502             .mr(5)
9503             .nr(16)
9504             .kr(4)
9505             .sr(1)
9506             .m(m)
9507             .n(n)
9508             .k(k)
9509             .iterations(1)
9510             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9511         }
9512       }
9513     }
9514   }
9515 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,n_gt_16)9516   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, n_gt_16) {
9517     TEST_REQUIRES_ARM_NEON_DOT;
9518     for (uint32_t n = 17; n < 32; n++) {
9519       for (size_t k = 1; k <= 40; k += 9) {
9520         GemmMicrokernelTester()
9521           .mr(5)
9522           .nr(16)
9523           .kr(4)
9524           .sr(1)
9525           .m(5)
9526           .n(n)
9527           .k(k)
9528           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9529       }
9530     }
9531   }
9532 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,n_gt_16_strided_cn)9533   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, n_gt_16_strided_cn) {
9534     TEST_REQUIRES_ARM_NEON_DOT;
9535     for (uint32_t n = 17; n < 32; n++) {
9536       for (size_t k = 1; k <= 40; k += 9) {
9537         GemmMicrokernelTester()
9538           .mr(5)
9539           .nr(16)
9540           .kr(4)
9541           .sr(1)
9542           .m(5)
9543           .n(n)
9544           .k(k)
9545           .cn_stride(19)
9546           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9547       }
9548     }
9549   }
9550 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,n_gt_16_subtile)9551   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, n_gt_16_subtile) {
9552     TEST_REQUIRES_ARM_NEON_DOT;
9553     for (uint32_t n = 17; n < 32; n++) {
9554       for (size_t k = 1; k <= 40; k += 9) {
9555         for (uint32_t m = 1; m <= 5; m++) {
9556           GemmMicrokernelTester()
9557             .mr(5)
9558             .nr(16)
9559             .kr(4)
9560             .sr(1)
9561             .m(m)
9562             .n(n)
9563             .k(k)
9564             .iterations(1)
9565             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9566         }
9567       }
9568     }
9569   }
9570 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,n_div_16)9571   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, n_div_16) {
9572     TEST_REQUIRES_ARM_NEON_DOT;
9573     for (uint32_t n = 32; n <= 48; n += 16) {
9574       for (size_t k = 1; k <= 40; k += 9) {
9575         GemmMicrokernelTester()
9576           .mr(5)
9577           .nr(16)
9578           .kr(4)
9579           .sr(1)
9580           .m(5)
9581           .n(n)
9582           .k(k)
9583           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9584       }
9585     }
9586   }
9587 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,n_div_16_strided_cn)9588   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, n_div_16_strided_cn) {
9589     TEST_REQUIRES_ARM_NEON_DOT;
9590     for (uint32_t n = 32; n <= 48; n += 16) {
9591       for (size_t k = 1; k <= 40; k += 9) {
9592         GemmMicrokernelTester()
9593           .mr(5)
9594           .nr(16)
9595           .kr(4)
9596           .sr(1)
9597           .m(5)
9598           .n(n)
9599           .k(k)
9600           .cn_stride(19)
9601           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9602       }
9603     }
9604   }
9605 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,n_div_16_subtile)9606   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, n_div_16_subtile) {
9607     TEST_REQUIRES_ARM_NEON_DOT;
9608     for (uint32_t n = 32; n <= 48; n += 16) {
9609       for (size_t k = 1; k <= 40; k += 9) {
9610         for (uint32_t m = 1; m <= 5; m++) {
9611           GemmMicrokernelTester()
9612             .mr(5)
9613             .nr(16)
9614             .kr(4)
9615             .sr(1)
9616             .m(m)
9617             .n(n)
9618             .k(k)
9619             .iterations(1)
9620             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9621         }
9622       }
9623     }
9624   }
9625 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,small_kernel)9626   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, small_kernel) {
9627     TEST_REQUIRES_ARM_NEON_DOT;
9628     for (size_t k = 1; k <= 40; k += 9) {
9629       GemmMicrokernelTester()
9630         .mr(5)
9631         .nr(16)
9632         .kr(4)
9633         .sr(1)
9634         .m(5)
9635         .n(16)
9636         .k(k)
9637         .ks(3)
9638         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9639     }
9640   }
9641 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,small_kernel_subtile)9642   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, small_kernel_subtile) {
9643     TEST_REQUIRES_ARM_NEON_DOT;
9644     for (size_t k = 1; k <= 40; k += 9) {
9645       for (uint32_t n = 1; n <= 16; n++) {
9646         for (uint32_t m = 1; m <= 5; m++) {
9647           GemmMicrokernelTester()
9648             .mr(5)
9649             .nr(16)
9650             .kr(4)
9651             .sr(1)
9652             .m(m)
9653             .n(n)
9654             .k(k)
9655             .ks(3)
9656             .iterations(1)
9657             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9658         }
9659       }
9660     }
9661   }
9662 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,n_gt_16_small_kernel)9663   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, n_gt_16_small_kernel) {
9664     TEST_REQUIRES_ARM_NEON_DOT;
9665     for (uint32_t n = 17; n < 32; n++) {
9666       for (size_t k = 1; k <= 40; k += 9) {
9667         GemmMicrokernelTester()
9668           .mr(5)
9669           .nr(16)
9670           .kr(4)
9671           .sr(1)
9672           .m(5)
9673           .n(n)
9674           .k(k)
9675           .ks(3)
9676           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9677       }
9678     }
9679   }
9680 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,n_div_16_small_kernel)9681   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, n_div_16_small_kernel) {
9682     TEST_REQUIRES_ARM_NEON_DOT;
9683     for (uint32_t n = 32; n <= 48; n += 16) {
9684       for (size_t k = 1; k <= 40; k += 9) {
9685         GemmMicrokernelTester()
9686           .mr(5)
9687           .nr(16)
9688           .kr(4)
9689           .sr(1)
9690           .m(5)
9691           .n(n)
9692           .k(k)
9693           .ks(3)
9694           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9695       }
9696     }
9697   }
9698 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,strided_cm_subtile)9699   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, strided_cm_subtile) {
9700     TEST_REQUIRES_ARM_NEON_DOT;
9701     for (size_t k = 1; k <= 40; k += 9) {
9702       for (uint32_t n = 1; n <= 16; n++) {
9703         for (uint32_t m = 1; m <= 5; m++) {
9704           GemmMicrokernelTester()
9705             .mr(5)
9706             .nr(16)
9707             .kr(4)
9708             .sr(1)
9709             .m(m)
9710             .n(n)
9711             .k(k)
9712             .cm_stride(19)
9713             .iterations(1)
9714             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9715         }
9716       }
9717     }
9718   }
9719 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,a_offset)9720   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, a_offset) {
9721     TEST_REQUIRES_ARM_NEON_DOT;
9722     for (size_t k = 1; k <= 40; k += 9) {
9723       GemmMicrokernelTester()
9724         .mr(5)
9725         .nr(16)
9726         .kr(4)
9727         .sr(1)
9728         .m(5)
9729         .n(16)
9730         .k(k)
9731         .ks(3)
9732         .a_offset(211)
9733         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9734     }
9735   }
9736 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,zero)9737   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, zero) {
9738     TEST_REQUIRES_ARM_NEON_DOT;
9739     for (size_t k = 1; k <= 40; k += 9) {
9740       for (uint32_t mz = 0; mz < 5; mz++) {
9741         GemmMicrokernelTester()
9742           .mr(5)
9743           .nr(16)
9744           .kr(4)
9745           .sr(1)
9746           .m(5)
9747           .n(16)
9748           .k(k)
9749           .ks(3)
9750           .a_offset(211)
9751           .zero_index(mz)
9752           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9753       }
9754     }
9755   }
9756 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,qmin)9757   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, qmin) {
9758     TEST_REQUIRES_ARM_NEON_DOT;
9759     GemmMicrokernelTester()
9760       .mr(5)
9761       .nr(16)
9762       .kr(4)
9763       .sr(1)
9764       .m(5)
9765       .n(16)
9766       .k(8)
9767       .qmin(128)
9768       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9769   }
9770 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,qmax)9771   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, qmax) {
9772     TEST_REQUIRES_ARM_NEON_DOT;
9773     GemmMicrokernelTester()
9774       .mr(5)
9775       .nr(16)
9776       .kr(4)
9777       .sr(1)
9778       .m(5)
9779       .n(16)
9780       .k(8)
9781       .qmax(128)
9782       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9783   }
9784 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,strided_cm)9785   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, strided_cm) {
9786     TEST_REQUIRES_ARM_NEON_DOT;
9787     GemmMicrokernelTester()
9788       .mr(5)
9789       .nr(16)
9790       .kr(4)
9791       .sr(1)
9792       .m(5)
9793       .n(16)
9794       .k(8)
9795       .cm_stride(19)
9796       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9797   }
9798 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,no_a_zero_point)9799   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, no_a_zero_point) {
9800     TEST_REQUIRES_ARM_NEON_DOT;
9801     for (size_t k = 1; k <= 40; k += 9) {
9802       GemmMicrokernelTester()
9803         .mr(5)
9804         .nr(16)
9805         .kr(4)
9806         .sr(1)
9807         .m(5)
9808         .n(16)
9809         .k(k)
9810         .a_zero_point(0)
9811         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9812     }
9813   }
9814 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,no_b_zero_point)9815   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, no_b_zero_point) {
9816     TEST_REQUIRES_ARM_NEON_DOT;
9817     for (size_t k = 1; k <= 40; k += 9) {
9818       GemmMicrokernelTester()
9819         .mr(5)
9820         .nr(16)
9821         .kr(4)
9822         .sr(1)
9823         .m(5)
9824         .n(16)
9825         .k(k)
9826         .b_zero_point(0)
9827         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9828     }
9829   }
9830 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,no_zero_point)9831   TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, no_zero_point) {
9832     TEST_REQUIRES_ARM_NEON_DOT;
9833     for (size_t k = 1; k <= 40; k += 9) {
9834       GemmMicrokernelTester()
9835         .mr(5)
9836         .nr(16)
9837         .kr(4)
9838         .sr(1)
9839         .m(5)
9840         .n(16)
9841         .k(k)
9842         .a_zero_point(0)
9843         .b_zero_point(0)
9844         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9845     }
9846   }
9847 #endif  // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
9848 
9849 
9850 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_eq_8)9851   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8) {
9852     TEST_REQUIRES_ARM_NEON_DOT;
9853     GemmMicrokernelTester()
9854       .mr(6)
9855       .nr(8)
9856       .kr(4)
9857       .sr(1)
9858       .m(6)
9859       .n(8)
9860       .k(8)
9861       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9862   }
9863 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,strided_cn)9864   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, strided_cn) {
9865     TEST_REQUIRES_ARM_NEON_DOT;
9866     GemmMicrokernelTester()
9867       .mr(6)
9868       .nr(8)
9869       .kr(4)
9870       .sr(1)
9871       .m(6)
9872       .n(8)
9873       .k(8)
9874       .cn_stride(11)
9875       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9876   }
9877 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_eq_8_subtile)9878   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8_subtile) {
9879     TEST_REQUIRES_ARM_NEON_DOT;
9880     for (uint32_t n = 1; n <= 8; n++) {
9881       for (uint32_t m = 1; m <= 6; m++) {
9882         GemmMicrokernelTester()
9883           .mr(6)
9884           .nr(8)
9885           .kr(4)
9886           .sr(1)
9887           .m(m)
9888           .n(n)
9889           .k(8)
9890           .iterations(1)
9891           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9892       }
9893     }
9894   }
9895 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_eq_8_subtile_m)9896   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8_subtile_m) {
9897     TEST_REQUIRES_ARM_NEON_DOT;
9898     for (uint32_t m = 1; m <= 6; m++) {
9899       GemmMicrokernelTester()
9900         .mr(6)
9901         .nr(8)
9902         .kr(4)
9903         .sr(1)
9904         .m(m)
9905         .n(8)
9906         .k(8)
9907         .iterations(1)
9908         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9909     }
9910   }
9911 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_eq_8_subtile_n)9912   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8_subtile_n) {
9913     TEST_REQUIRES_ARM_NEON_DOT;
9914     for (uint32_t n = 1; n <= 8; n++) {
9915       GemmMicrokernelTester()
9916         .mr(6)
9917         .nr(8)
9918         .kr(4)
9919         .sr(1)
9920         .m(6)
9921         .n(n)
9922         .k(8)
9923         .iterations(1)
9924         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9925     }
9926   }
9927 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_lt_8)9928   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_lt_8) {
9929     TEST_REQUIRES_ARM_NEON_DOT;
9930     for (size_t k = 1; k < 8; k++) {
9931       GemmMicrokernelTester()
9932         .mr(6)
9933         .nr(8)
9934         .kr(4)
9935         .sr(1)
9936         .m(6)
9937         .n(8)
9938         .k(k)
9939         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9940     }
9941   }
9942 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_lt_8_subtile)9943   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_lt_8_subtile) {
9944     TEST_REQUIRES_ARM_NEON_DOT;
9945     for (size_t k = 1; k < 8; k++) {
9946       for (uint32_t n = 1; n <= 8; n++) {
9947         for (uint32_t m = 1; m <= 6; m++) {
9948           GemmMicrokernelTester()
9949             .mr(6)
9950             .nr(8)
9951             .kr(4)
9952             .sr(1)
9953             .m(m)
9954             .n(n)
9955             .k(k)
9956             .iterations(1)
9957             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9958         }
9959       }
9960     }
9961   }
9962 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_gt_8)9963   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_gt_8) {
9964     TEST_REQUIRES_ARM_NEON_DOT;
9965     for (size_t k = 9; k < 16; k++) {
9966       GemmMicrokernelTester()
9967         .mr(6)
9968         .nr(8)
9969         .kr(4)
9970         .sr(1)
9971         .m(6)
9972         .n(8)
9973         .k(k)
9974         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9975     }
9976   }
9977 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_gt_8_subtile)9978   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_gt_8_subtile) {
9979     TEST_REQUIRES_ARM_NEON_DOT;
9980     for (size_t k = 9; k < 16; k++) {
9981       for (uint32_t n = 1; n <= 8; n++) {
9982         for (uint32_t m = 1; m <= 6; m++) {
9983           GemmMicrokernelTester()
9984             .mr(6)
9985             .nr(8)
9986             .kr(4)
9987             .sr(1)
9988             .m(m)
9989             .n(n)
9990             .k(k)
9991             .iterations(1)
9992             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9993         }
9994       }
9995     }
9996   }
9997 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_div_8)9998   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_div_8) {
9999     TEST_REQUIRES_ARM_NEON_DOT;
10000     for (size_t k = 16; k <= 80; k += 8) {
10001       GemmMicrokernelTester()
10002         .mr(6)
10003         .nr(8)
10004         .kr(4)
10005         .sr(1)
10006         .m(6)
10007         .n(8)
10008         .k(k)
10009         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10010     }
10011   }
10012 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_div_8_subtile)10013   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_div_8_subtile) {
10014     TEST_REQUIRES_ARM_NEON_DOT;
10015     for (size_t k = 16; k <= 80; k += 8) {
10016       for (uint32_t n = 1; n <= 8; n++) {
10017         for (uint32_t m = 1; m <= 6; m++) {
10018           GemmMicrokernelTester()
10019             .mr(6)
10020             .nr(8)
10021             .kr(4)
10022             .sr(1)
10023             .m(m)
10024             .n(n)
10025             .k(k)
10026             .iterations(1)
10027             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10028         }
10029       }
10030     }
10031   }
10032 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,n_gt_8)10033   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8) {
10034     TEST_REQUIRES_ARM_NEON_DOT;
10035     for (uint32_t n = 9; n < 16; n++) {
10036       for (size_t k = 1; k <= 40; k += 9) {
10037         GemmMicrokernelTester()
10038           .mr(6)
10039           .nr(8)
10040           .kr(4)
10041           .sr(1)
10042           .m(6)
10043           .n(n)
10044           .k(k)
10045           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10046       }
10047     }
10048   }
10049 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,n_gt_8_strided_cn)10050   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8_strided_cn) {
10051     TEST_REQUIRES_ARM_NEON_DOT;
10052     for (uint32_t n = 9; n < 16; n++) {
10053       for (size_t k = 1; k <= 40; k += 9) {
10054         GemmMicrokernelTester()
10055           .mr(6)
10056           .nr(8)
10057           .kr(4)
10058           .sr(1)
10059           .m(6)
10060           .n(n)
10061           .k(k)
10062           .cn_stride(11)
10063           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10064       }
10065     }
10066   }
10067 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,n_gt_8_subtile)10068   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8_subtile) {
10069     TEST_REQUIRES_ARM_NEON_DOT;
10070     for (uint32_t n = 9; n < 16; n++) {
10071       for (size_t k = 1; k <= 40; k += 9) {
10072         for (uint32_t m = 1; m <= 6; m++) {
10073           GemmMicrokernelTester()
10074             .mr(6)
10075             .nr(8)
10076             .kr(4)
10077             .sr(1)
10078             .m(m)
10079             .n(n)
10080             .k(k)
10081             .iterations(1)
10082             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10083         }
10084       }
10085     }
10086   }
10087 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,n_div_8)10088   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8) {
10089     TEST_REQUIRES_ARM_NEON_DOT;
10090     for (uint32_t n = 16; n <= 24; n += 8) {
10091       for (size_t k = 1; k <= 40; k += 9) {
10092         GemmMicrokernelTester()
10093           .mr(6)
10094           .nr(8)
10095           .kr(4)
10096           .sr(1)
10097           .m(6)
10098           .n(n)
10099           .k(k)
10100           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10101       }
10102     }
10103   }
10104 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,n_div_8_strided_cn)10105   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8_strided_cn) {
10106     TEST_REQUIRES_ARM_NEON_DOT;
10107     for (uint32_t n = 16; n <= 24; n += 8) {
10108       for (size_t k = 1; k <= 40; k += 9) {
10109         GemmMicrokernelTester()
10110           .mr(6)
10111           .nr(8)
10112           .kr(4)
10113           .sr(1)
10114           .m(6)
10115           .n(n)
10116           .k(k)
10117           .cn_stride(11)
10118           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10119       }
10120     }
10121   }
10122 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,n_div_8_subtile)10123   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8_subtile) {
10124     TEST_REQUIRES_ARM_NEON_DOT;
10125     for (uint32_t n = 16; n <= 24; n += 8) {
10126       for (size_t k = 1; k <= 40; k += 9) {
10127         for (uint32_t m = 1; m <= 6; m++) {
10128           GemmMicrokernelTester()
10129             .mr(6)
10130             .nr(8)
10131             .kr(4)
10132             .sr(1)
10133             .m(m)
10134             .n(n)
10135             .k(k)
10136             .iterations(1)
10137             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10138         }
10139       }
10140     }
10141   }
10142 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,small_kernel)10143   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, small_kernel) {
10144     TEST_REQUIRES_ARM_NEON_DOT;
10145     for (size_t k = 1; k <= 40; k += 9) {
10146       GemmMicrokernelTester()
10147         .mr(6)
10148         .nr(8)
10149         .kr(4)
10150         .sr(1)
10151         .m(6)
10152         .n(8)
10153         .k(k)
10154         .ks(3)
10155         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10156     }
10157   }
10158 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,small_kernel_subtile)10159   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, small_kernel_subtile) {
10160     TEST_REQUIRES_ARM_NEON_DOT;
10161     for (size_t k = 1; k <= 40; k += 9) {
10162       for (uint32_t n = 1; n <= 8; n++) {
10163         for (uint32_t m = 1; m <= 6; m++) {
10164           GemmMicrokernelTester()
10165             .mr(6)
10166             .nr(8)
10167             .kr(4)
10168             .sr(1)
10169             .m(m)
10170             .n(n)
10171             .k(k)
10172             .ks(3)
10173             .iterations(1)
10174             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10175         }
10176       }
10177     }
10178   }
10179 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,n_gt_8_small_kernel)10180   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8_small_kernel) {
10181     TEST_REQUIRES_ARM_NEON_DOT;
10182     for (uint32_t n = 9; n < 16; n++) {
10183       for (size_t k = 1; k <= 40; k += 9) {
10184         GemmMicrokernelTester()
10185           .mr(6)
10186           .nr(8)
10187           .kr(4)
10188           .sr(1)
10189           .m(6)
10190           .n(n)
10191           .k(k)
10192           .ks(3)
10193           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10194       }
10195     }
10196   }
10197 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,n_div_8_small_kernel)10198   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8_small_kernel) {
10199     TEST_REQUIRES_ARM_NEON_DOT;
10200     for (uint32_t n = 16; n <= 24; n += 8) {
10201       for (size_t k = 1; k <= 40; k += 9) {
10202         GemmMicrokernelTester()
10203           .mr(6)
10204           .nr(8)
10205           .kr(4)
10206           .sr(1)
10207           .m(6)
10208           .n(n)
10209           .k(k)
10210           .ks(3)
10211           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10212       }
10213     }
10214   }
10215 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,strided_cm_subtile)10216   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, strided_cm_subtile) {
10217     TEST_REQUIRES_ARM_NEON_DOT;
10218     for (size_t k = 1; k <= 40; k += 9) {
10219       for (uint32_t n = 1; n <= 8; n++) {
10220         for (uint32_t m = 1; m <= 6; m++) {
10221           GemmMicrokernelTester()
10222             .mr(6)
10223             .nr(8)
10224             .kr(4)
10225             .sr(1)
10226             .m(m)
10227             .n(n)
10228             .k(k)
10229             .cm_stride(11)
10230             .iterations(1)
10231             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10232         }
10233       }
10234     }
10235   }
10236 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,a_offset)10237   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, a_offset) {
10238     TEST_REQUIRES_ARM_NEON_DOT;
10239     for (size_t k = 1; k <= 40; k += 9) {
10240       GemmMicrokernelTester()
10241         .mr(6)
10242         .nr(8)
10243         .kr(4)
10244         .sr(1)
10245         .m(6)
10246         .n(8)
10247         .k(k)
10248         .ks(3)
10249         .a_offset(251)
10250         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10251     }
10252   }
10253 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,zero)10254   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, zero) {
10255     TEST_REQUIRES_ARM_NEON_DOT;
10256     for (size_t k = 1; k <= 40; k += 9) {
10257       for (uint32_t mz = 0; mz < 6; mz++) {
10258         GemmMicrokernelTester()
10259           .mr(6)
10260           .nr(8)
10261           .kr(4)
10262           .sr(1)
10263           .m(6)
10264           .n(8)
10265           .k(k)
10266           .ks(3)
10267           .a_offset(251)
10268           .zero_index(mz)
10269           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10270       }
10271     }
10272   }
10273 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,qmin)10274   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, qmin) {
10275     TEST_REQUIRES_ARM_NEON_DOT;
10276     GemmMicrokernelTester()
10277       .mr(6)
10278       .nr(8)
10279       .kr(4)
10280       .sr(1)
10281       .m(6)
10282       .n(8)
10283       .k(8)
10284       .qmin(128)
10285       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10286   }
10287 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,qmax)10288   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, qmax) {
10289     TEST_REQUIRES_ARM_NEON_DOT;
10290     GemmMicrokernelTester()
10291       .mr(6)
10292       .nr(8)
10293       .kr(4)
10294       .sr(1)
10295       .m(6)
10296       .n(8)
10297       .k(8)
10298       .qmax(128)
10299       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10300   }
10301 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,strided_cm)10302   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, strided_cm) {
10303     TEST_REQUIRES_ARM_NEON_DOT;
10304     GemmMicrokernelTester()
10305       .mr(6)
10306       .nr(8)
10307       .kr(4)
10308       .sr(1)
10309       .m(6)
10310       .n(8)
10311       .k(8)
10312       .cm_stride(11)
10313       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10314   }
10315 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,no_a_zero_point)10316   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, no_a_zero_point) {
10317     TEST_REQUIRES_ARM_NEON_DOT;
10318     for (size_t k = 1; k <= 40; k += 9) {
10319       GemmMicrokernelTester()
10320         .mr(6)
10321         .nr(8)
10322         .kr(4)
10323         .sr(1)
10324         .m(6)
10325         .n(8)
10326         .k(k)
10327         .a_zero_point(0)
10328         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10329     }
10330   }
10331 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,no_b_zero_point)10332   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, no_b_zero_point) {
10333     TEST_REQUIRES_ARM_NEON_DOT;
10334     for (size_t k = 1; k <= 40; k += 9) {
10335       GemmMicrokernelTester()
10336         .mr(6)
10337         .nr(8)
10338         .kr(4)
10339         .sr(1)
10340         .m(6)
10341         .n(8)
10342         .k(k)
10343         .b_zero_point(0)
10344         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10345     }
10346   }
10347 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,no_zero_point)10348   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, no_zero_point) {
10349     TEST_REQUIRES_ARM_NEON_DOT;
10350     for (size_t k = 1; k <= 40; k += 9) {
10351       GemmMicrokernelTester()
10352         .mr(6)
10353         .nr(8)
10354         .kr(4)
10355         .sr(1)
10356         .m(6)
10357         .n(8)
10358         .k(k)
10359         .a_zero_point(0)
10360         .b_zero_point(0)
10361         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10362     }
10363   }
10364 #endif  // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
10365 
10366 
10367 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_eq_8)10368   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_eq_8) {
10369     TEST_REQUIRES_ARM_NEON_DOT;
10370     GemmMicrokernelTester()
10371       .mr(8)
10372       .nr(8)
10373       .kr(4)
10374       .sr(1)
10375       .m(8)
10376       .n(8)
10377       .k(8)
10378       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10379   }
10380 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,strided_cn)10381   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, strided_cn) {
10382     TEST_REQUIRES_ARM_NEON_DOT;
10383     GemmMicrokernelTester()
10384       .mr(8)
10385       .nr(8)
10386       .kr(4)
10387       .sr(1)
10388       .m(8)
10389       .n(8)
10390       .k(8)
10391       .cn_stride(11)
10392       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10393   }
10394 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_eq_8_subtile)10395   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_eq_8_subtile) {
10396     TEST_REQUIRES_ARM_NEON_DOT;
10397     for (uint32_t n = 1; n <= 8; n++) {
10398       for (uint32_t m = 1; m <= 8; m++) {
10399         GemmMicrokernelTester()
10400           .mr(8)
10401           .nr(8)
10402           .kr(4)
10403           .sr(1)
10404           .m(m)
10405           .n(n)
10406           .k(8)
10407           .iterations(1)
10408           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10409       }
10410     }
10411   }
10412 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_eq_8_subtile_m)10413   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_eq_8_subtile_m) {
10414     TEST_REQUIRES_ARM_NEON_DOT;
10415     for (uint32_t m = 1; m <= 8; m++) {
10416       GemmMicrokernelTester()
10417         .mr(8)
10418         .nr(8)
10419         .kr(4)
10420         .sr(1)
10421         .m(m)
10422         .n(8)
10423         .k(8)
10424         .iterations(1)
10425         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10426     }
10427   }
10428 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_eq_8_subtile_n)10429   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_eq_8_subtile_n) {
10430     TEST_REQUIRES_ARM_NEON_DOT;
10431     for (uint32_t n = 1; n <= 8; n++) {
10432       GemmMicrokernelTester()
10433         .mr(8)
10434         .nr(8)
10435         .kr(4)
10436         .sr(1)
10437         .m(8)
10438         .n(n)
10439         .k(8)
10440         .iterations(1)
10441         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10442     }
10443   }
10444 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_lt_8)10445   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_lt_8) {
10446     TEST_REQUIRES_ARM_NEON_DOT;
10447     for (size_t k = 1; k < 8; k++) {
10448       GemmMicrokernelTester()
10449         .mr(8)
10450         .nr(8)
10451         .kr(4)
10452         .sr(1)
10453         .m(8)
10454         .n(8)
10455         .k(k)
10456         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10457     }
10458   }
10459 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_lt_8_subtile)10460   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_lt_8_subtile) {
10461     TEST_REQUIRES_ARM_NEON_DOT;
10462     for (size_t k = 1; k < 8; k++) {
10463       for (uint32_t n = 1; n <= 8; n++) {
10464         for (uint32_t m = 1; m <= 8; m++) {
10465           GemmMicrokernelTester()
10466             .mr(8)
10467             .nr(8)
10468             .kr(4)
10469             .sr(1)
10470             .m(m)
10471             .n(n)
10472             .k(k)
10473             .iterations(1)
10474             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10475         }
10476       }
10477     }
10478   }
10479 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_gt_8)10480   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_gt_8) {
10481     TEST_REQUIRES_ARM_NEON_DOT;
10482     for (size_t k = 9; k < 16; k++) {
10483       GemmMicrokernelTester()
10484         .mr(8)
10485         .nr(8)
10486         .kr(4)
10487         .sr(1)
10488         .m(8)
10489         .n(8)
10490         .k(k)
10491         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10492     }
10493   }
10494 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_gt_8_subtile)10495   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_gt_8_subtile) {
10496     TEST_REQUIRES_ARM_NEON_DOT;
10497     for (size_t k = 9; k < 16; k++) {
10498       for (uint32_t n = 1; n <= 8; n++) {
10499         for (uint32_t m = 1; m <= 8; m++) {
10500           GemmMicrokernelTester()
10501             .mr(8)
10502             .nr(8)
10503             .kr(4)
10504             .sr(1)
10505             .m(m)
10506             .n(n)
10507             .k(k)
10508             .iterations(1)
10509             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10510         }
10511       }
10512     }
10513   }
10514 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_div_8)10515   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_div_8) {
10516     TEST_REQUIRES_ARM_NEON_DOT;
10517     for (size_t k = 16; k <= 80; k += 8) {
10518       GemmMicrokernelTester()
10519         .mr(8)
10520         .nr(8)
10521         .kr(4)
10522         .sr(1)
10523         .m(8)
10524         .n(8)
10525         .k(k)
10526         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10527     }
10528   }
10529 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_div_8_subtile)10530   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_div_8_subtile) {
10531     TEST_REQUIRES_ARM_NEON_DOT;
10532     for (size_t k = 16; k <= 80; k += 8) {
10533       for (uint32_t n = 1; n <= 8; n++) {
10534         for (uint32_t m = 1; m <= 8; m++) {
10535           GemmMicrokernelTester()
10536             .mr(8)
10537             .nr(8)
10538             .kr(4)
10539             .sr(1)
10540             .m(m)
10541             .n(n)
10542             .k(k)
10543             .iterations(1)
10544             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10545         }
10546       }
10547     }
10548   }
10549 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,n_gt_8)10550   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_gt_8) {
10551     TEST_REQUIRES_ARM_NEON_DOT;
10552     for (uint32_t n = 9; n < 16; n++) {
10553       for (size_t k = 1; k <= 40; k += 9) {
10554         GemmMicrokernelTester()
10555           .mr(8)
10556           .nr(8)
10557           .kr(4)
10558           .sr(1)
10559           .m(8)
10560           .n(n)
10561           .k(k)
10562           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10563       }
10564     }
10565   }
10566 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,n_gt_8_strided_cn)10567   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_gt_8_strided_cn) {
10568     TEST_REQUIRES_ARM_NEON_DOT;
10569     for (uint32_t n = 9; n < 16; n++) {
10570       for (size_t k = 1; k <= 40; k += 9) {
10571         GemmMicrokernelTester()
10572           .mr(8)
10573           .nr(8)
10574           .kr(4)
10575           .sr(1)
10576           .m(8)
10577           .n(n)
10578           .k(k)
10579           .cn_stride(11)
10580           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10581       }
10582     }
10583   }
10584 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,n_gt_8_subtile)10585   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_gt_8_subtile) {
10586     TEST_REQUIRES_ARM_NEON_DOT;
10587     for (uint32_t n = 9; n < 16; n++) {
10588       for (size_t k = 1; k <= 40; k += 9) {
10589         for (uint32_t m = 1; m <= 8; m++) {
10590           GemmMicrokernelTester()
10591             .mr(8)
10592             .nr(8)
10593             .kr(4)
10594             .sr(1)
10595             .m(m)
10596             .n(n)
10597             .k(k)
10598             .iterations(1)
10599             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10600         }
10601       }
10602     }
10603   }
10604 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,n_div_8)10605   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_div_8) {
10606     TEST_REQUIRES_ARM_NEON_DOT;
10607     for (uint32_t n = 16; n <= 24; n += 8) {
10608       for (size_t k = 1; k <= 40; k += 9) {
10609         GemmMicrokernelTester()
10610           .mr(8)
10611           .nr(8)
10612           .kr(4)
10613           .sr(1)
10614           .m(8)
10615           .n(n)
10616           .k(k)
10617           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10618       }
10619     }
10620   }
10621 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,n_div_8_strided_cn)10622   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_div_8_strided_cn) {
10623     TEST_REQUIRES_ARM_NEON_DOT;
10624     for (uint32_t n = 16; n <= 24; n += 8) {
10625       for (size_t k = 1; k <= 40; k += 9) {
10626         GemmMicrokernelTester()
10627           .mr(8)
10628           .nr(8)
10629           .kr(4)
10630           .sr(1)
10631           .m(8)
10632           .n(n)
10633           .k(k)
10634           .cn_stride(11)
10635           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10636       }
10637     }
10638   }
10639 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,n_div_8_subtile)10640   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_div_8_subtile) {
10641     TEST_REQUIRES_ARM_NEON_DOT;
10642     for (uint32_t n = 16; n <= 24; n += 8) {
10643       for (size_t k = 1; k <= 40; k += 9) {
10644         for (uint32_t m = 1; m <= 8; m++) {
10645           GemmMicrokernelTester()
10646             .mr(8)
10647             .nr(8)
10648             .kr(4)
10649             .sr(1)
10650             .m(m)
10651             .n(n)
10652             .k(k)
10653             .iterations(1)
10654             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10655         }
10656       }
10657     }
10658   }
10659 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,small_kernel)10660   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, small_kernel) {
10661     TEST_REQUIRES_ARM_NEON_DOT;
10662     for (size_t k = 1; k <= 40; k += 9) {
10663       GemmMicrokernelTester()
10664         .mr(8)
10665         .nr(8)
10666         .kr(4)
10667         .sr(1)
10668         .m(8)
10669         .n(8)
10670         .k(k)
10671         .ks(3)
10672         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10673     }
10674   }
10675 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,small_kernel_subtile)10676   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, small_kernel_subtile) {
10677     TEST_REQUIRES_ARM_NEON_DOT;
10678     for (size_t k = 1; k <= 40; k += 9) {
10679       for (uint32_t n = 1; n <= 8; n++) {
10680         for (uint32_t m = 1; m <= 8; m++) {
10681           GemmMicrokernelTester()
10682             .mr(8)
10683             .nr(8)
10684             .kr(4)
10685             .sr(1)
10686             .m(m)
10687             .n(n)
10688             .k(k)
10689             .ks(3)
10690             .iterations(1)
10691             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10692         }
10693       }
10694     }
10695   }
10696 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,n_gt_8_small_kernel)10697   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_gt_8_small_kernel) {
10698     TEST_REQUIRES_ARM_NEON_DOT;
10699     for (uint32_t n = 9; n < 16; n++) {
10700       for (size_t k = 1; k <= 40; k += 9) {
10701         GemmMicrokernelTester()
10702           .mr(8)
10703           .nr(8)
10704           .kr(4)
10705           .sr(1)
10706           .m(8)
10707           .n(n)
10708           .k(k)
10709           .ks(3)
10710           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10711       }
10712     }
10713   }
10714 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,n_div_8_small_kernel)10715   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_div_8_small_kernel) {
10716     TEST_REQUIRES_ARM_NEON_DOT;
10717     for (uint32_t n = 16; n <= 24; n += 8) {
10718       for (size_t k = 1; k <= 40; k += 9) {
10719         GemmMicrokernelTester()
10720           .mr(8)
10721           .nr(8)
10722           .kr(4)
10723           .sr(1)
10724           .m(8)
10725           .n(n)
10726           .k(k)
10727           .ks(3)
10728           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10729       }
10730     }
10731   }
10732 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,strided_cm_subtile)10733   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, strided_cm_subtile) {
10734     TEST_REQUIRES_ARM_NEON_DOT;
10735     for (size_t k = 1; k <= 40; k += 9) {
10736       for (uint32_t n = 1; n <= 8; n++) {
10737         for (uint32_t m = 1; m <= 8; m++) {
10738           GemmMicrokernelTester()
10739             .mr(8)
10740             .nr(8)
10741             .kr(4)
10742             .sr(1)
10743             .m(m)
10744             .n(n)
10745             .k(k)
10746             .cm_stride(11)
10747             .iterations(1)
10748             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10749         }
10750       }
10751     }
10752   }
10753 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,a_offset)10754   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, a_offset) {
10755     TEST_REQUIRES_ARM_NEON_DOT;
10756     for (size_t k = 1; k <= 40; k += 9) {
10757       GemmMicrokernelTester()
10758         .mr(8)
10759         .nr(8)
10760         .kr(4)
10761         .sr(1)
10762         .m(8)
10763         .n(8)
10764         .k(k)
10765         .ks(3)
10766         .a_offset(331)
10767         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10768     }
10769   }
10770 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,zero)10771   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, zero) {
10772     TEST_REQUIRES_ARM_NEON_DOT;
10773     for (size_t k = 1; k <= 40; k += 9) {
10774       for (uint32_t mz = 0; mz < 8; mz++) {
10775         GemmMicrokernelTester()
10776           .mr(8)
10777           .nr(8)
10778           .kr(4)
10779           .sr(1)
10780           .m(8)
10781           .n(8)
10782           .k(k)
10783           .ks(3)
10784           .a_offset(331)
10785           .zero_index(mz)
10786           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10787       }
10788     }
10789   }
10790 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,qmin)10791   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, qmin) {
10792     TEST_REQUIRES_ARM_NEON_DOT;
10793     GemmMicrokernelTester()
10794       .mr(8)
10795       .nr(8)
10796       .kr(4)
10797       .sr(1)
10798       .m(8)
10799       .n(8)
10800       .k(8)
10801       .qmin(128)
10802       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10803   }
10804 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,qmax)10805   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, qmax) {
10806     TEST_REQUIRES_ARM_NEON_DOT;
10807     GemmMicrokernelTester()
10808       .mr(8)
10809       .nr(8)
10810       .kr(4)
10811       .sr(1)
10812       .m(8)
10813       .n(8)
10814       .k(8)
10815       .qmax(128)
10816       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10817   }
10818 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,strided_cm)10819   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, strided_cm) {
10820     TEST_REQUIRES_ARM_NEON_DOT;
10821     GemmMicrokernelTester()
10822       .mr(8)
10823       .nr(8)
10824       .kr(4)
10825       .sr(1)
10826       .m(8)
10827       .n(8)
10828       .k(8)
10829       .cm_stride(11)
10830       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10831   }
10832 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,no_a_zero_point)10833   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, no_a_zero_point) {
10834     TEST_REQUIRES_ARM_NEON_DOT;
10835     for (size_t k = 1; k <= 40; k += 9) {
10836       GemmMicrokernelTester()
10837         .mr(8)
10838         .nr(8)
10839         .kr(4)
10840         .sr(1)
10841         .m(8)
10842         .n(8)
10843         .k(k)
10844         .a_zero_point(0)
10845         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10846     }
10847   }
10848 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,no_b_zero_point)10849   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, no_b_zero_point) {
10850     TEST_REQUIRES_ARM_NEON_DOT;
10851     for (size_t k = 1; k <= 40; k += 9) {
10852       GemmMicrokernelTester()
10853         .mr(8)
10854         .nr(8)
10855         .kr(4)
10856         .sr(1)
10857         .m(8)
10858         .n(8)
10859         .k(k)
10860         .b_zero_point(0)
10861         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10862     }
10863   }
10864 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,no_zero_point)10865   TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, no_zero_point) {
10866     TEST_REQUIRES_ARM_NEON_DOT;
10867     for (size_t k = 1; k <= 40; k += 9) {
10868       GemmMicrokernelTester()
10869         .mr(8)
10870         .nr(8)
10871         .kr(4)
10872         .sr(1)
10873         .m(8)
10874         .n(8)
10875         .k(k)
10876         .a_zero_point(0)
10877         .b_zero_point(0)
10878         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10879     }
10880   }
10881 #endif  // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
10882 
10883 
10884 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_eq_8)10885   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8) {
10886     TEST_REQUIRES_ARM_NEON_DOT;
10887     GemmMicrokernelTester()
10888       .mr(8)
10889       .nr(16)
10890       .kr(4)
10891       .sr(1)
10892       .m(8)
10893       .n(16)
10894       .k(8)
10895       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10896   }
10897 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,strided_cn)10898   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, strided_cn) {
10899     TEST_REQUIRES_ARM_NEON_DOT;
10900     GemmMicrokernelTester()
10901       .mr(8)
10902       .nr(16)
10903       .kr(4)
10904       .sr(1)
10905       .m(8)
10906       .n(16)
10907       .k(8)
10908       .cn_stride(19)
10909       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10910   }
10911 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_eq_8_subtile)10912   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8_subtile) {
10913     TEST_REQUIRES_ARM_NEON_DOT;
10914     for (uint32_t n = 1; n <= 16; n++) {
10915       for (uint32_t m = 1; m <= 8; m++) {
10916         GemmMicrokernelTester()
10917           .mr(8)
10918           .nr(16)
10919           .kr(4)
10920           .sr(1)
10921           .m(m)
10922           .n(n)
10923           .k(8)
10924           .iterations(1)
10925           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10926       }
10927     }
10928   }
10929 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_eq_8_subtile_m)10930   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8_subtile_m) {
10931     TEST_REQUIRES_ARM_NEON_DOT;
10932     for (uint32_t m = 1; m <= 8; m++) {
10933       GemmMicrokernelTester()
10934         .mr(8)
10935         .nr(16)
10936         .kr(4)
10937         .sr(1)
10938         .m(m)
10939         .n(16)
10940         .k(8)
10941         .iterations(1)
10942         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10943     }
10944   }
10945 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_eq_8_subtile_n)10946   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8_subtile_n) {
10947     TEST_REQUIRES_ARM_NEON_DOT;
10948     for (uint32_t n = 1; n <= 16; n++) {
10949       GemmMicrokernelTester()
10950         .mr(8)
10951         .nr(16)
10952         .kr(4)
10953         .sr(1)
10954         .m(8)
10955         .n(n)
10956         .k(8)
10957         .iterations(1)
10958         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10959     }
10960   }
10961 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_lt_8)10962   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_lt_8) {
10963     TEST_REQUIRES_ARM_NEON_DOT;
10964     for (size_t k = 1; k < 8; k++) {
10965       GemmMicrokernelTester()
10966         .mr(8)
10967         .nr(16)
10968         .kr(4)
10969         .sr(1)
10970         .m(8)
10971         .n(16)
10972         .k(k)
10973         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10974     }
10975   }
10976 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_lt_8_subtile)10977   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_lt_8_subtile) {
10978     TEST_REQUIRES_ARM_NEON_DOT;
10979     for (size_t k = 1; k < 8; k++) {
10980       for (uint32_t n = 1; n <= 16; n++) {
10981         for (uint32_t m = 1; m <= 8; m++) {
10982           GemmMicrokernelTester()
10983             .mr(8)
10984             .nr(16)
10985             .kr(4)
10986             .sr(1)
10987             .m(m)
10988             .n(n)
10989             .k(k)
10990             .iterations(1)
10991             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10992         }
10993       }
10994     }
10995   }
10996 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_gt_8)10997   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_gt_8) {
10998     TEST_REQUIRES_ARM_NEON_DOT;
10999     for (size_t k = 9; k < 16; k++) {
11000       GemmMicrokernelTester()
11001         .mr(8)
11002         .nr(16)
11003         .kr(4)
11004         .sr(1)
11005         .m(8)
11006         .n(16)
11007         .k(k)
11008         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
11009     }
11010   }
11011 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_gt_8_subtile)11012   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_gt_8_subtile) {
11013     TEST_REQUIRES_ARM_NEON_DOT;
11014     for (size_t k = 9; k < 16; k++) {
11015       for (uint32_t n = 1; n <= 16; n++) {
11016         for (uint32_t m = 1; m <= 8; m++) {
11017           GemmMicrokernelTester()
11018             .mr(8)
11019             .nr(16)
11020             .kr(4)
11021             .sr(1)
11022             .m(m)
11023             .n(n)
11024             .k(k)
11025             .iterations(1)
11026             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
11027         }
11028       }
11029     }
11030   }
11031 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_div_8)11032   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_div_8) {
11033     TEST_REQUIRES_ARM_NEON_DOT;
11034     for (size_t k = 16; k <= 80; k += 8) {
11035       GemmMicrokernelTester()
11036         .mr(8)
11037         .nr(16)
11038         .kr(4)
11039         .sr(1)
11040         .m(8)
11041         .n(16)
11042         .k(k)
11043         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
11044     }
11045   }
11046 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_div_8_subtile)11047   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_div_8_subtile) {
11048     TEST_REQUIRES_ARM_NEON_DOT;
11049     for (size_t k = 16; k <= 80; k += 8) {
11050       for (uint32_t n = 1; n <= 16; n++) {
11051         for (uint32_t m = 1; m <= 8; m++) {
11052           GemmMicrokernelTester()
11053             .mr(8)
11054             .nr(16)
11055             .kr(4)
11056             .sr(1)
11057             .m(m)
11058             .n(n)
11059             .k(k)
11060             .iterations(1)
11061             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
11062         }
11063       }
11064     }
11065   }
11066 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,n_gt_16)11067   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16) {
11068     TEST_REQUIRES_ARM_NEON_DOT;
11069     for (uint32_t n = 17; n < 32; n++) {
11070       for (size_t k = 1; k <= 40; k += 9) {
11071         GemmMicrokernelTester()
11072           .mr(8)
11073           .nr(16)
11074           .kr(4)
11075           .sr(1)
11076           .m(8)
11077           .n(n)
11078           .k(k)
11079           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
11080       }
11081     }
11082   }
11083 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,n_gt_16_strided_cn)11084   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16_strided_cn) {
11085     TEST_REQUIRES_ARM_NEON_DOT;
11086     for (uint32_t n = 17; n < 32; n++) {
11087       for (size_t k = 1; k <= 40; k += 9) {
11088         GemmMicrokernelTester()
11089           .mr(8)
11090           .nr(16)
11091           .kr(4)
11092           .sr(1)
11093           .m(8)
11094           .n(n)
11095           .k(k)
11096           .cn_stride(19)
11097           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
11098       }
11099     }
11100   }
11101 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,n_gt_16_subtile)11102   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16_subtile) {
11103     TEST_REQUIRES_ARM_NEON_DOT;
11104     for (uint32_t n = 17; n < 32; n++) {
11105       for (size_t k = 1; k <= 40; k += 9) {
11106         for (uint32_t m = 1; m <= 8; m++) {
11107           GemmMicrokernelTester()
11108             .mr(8)
11109             .nr(16)
11110             .kr(4)
11111             .sr(1)
11112             .m(m)
11113             .n(n)
11114             .k(k)
11115             .iterations(1)
11116             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
11117         }
11118       }
11119     }
11120   }
11121 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,n_div_16)11122   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16) {
11123     TEST_REQUIRES_ARM_NEON_DOT;
11124     for (uint32_t n = 32; n <= 48; n += 16) {
11125       for (size_t k = 1; k <= 40; k += 9) {
11126         GemmMicrokernelTester()
11127           .mr(8)
11128           .nr(16)
11129           .kr(4)
11130           .sr(1)
11131           .m(8)
11132           .n(n)
11133           .k(k)
11134           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
11135       }
11136     }
11137   }
11138 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,n_div_16_strided_cn)11139   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16_strided_cn) {
11140     TEST_REQUIRES_ARM_NEON_DOT;
11141     for (uint32_t n = 32; n <= 48; n += 16) {
11142       for (size_t k = 1; k <= 40; k += 9) {
11143         GemmMicrokernelTester()
11144           .mr(8)
11145           .nr(16)
11146           .kr(4)
11147           .sr(1)
11148           .m(8)
11149           .n(n)
11150           .k(k)
11151           .cn_stride(19)
11152           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
11153       }
11154     }
11155   }
11156 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,n_div_16_subtile)11157   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16_subtile) {
11158     TEST_REQUIRES_ARM_NEON_DOT;
11159     for (uint32_t n = 32; n <= 48; n += 16) {
11160       for (size_t k = 1; k <= 40; k += 9) {
11161         for (uint32_t m = 1; m <= 8; m++) {
11162           GemmMicrokernelTester()
11163             .mr(8)
11164             .nr(16)
11165             .kr(4)
11166             .sr(1)
11167             .m(m)
11168             .n(n)
11169             .k(k)
11170             .iterations(1)
11171             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
11172         }
11173       }
11174     }
11175   }
11176 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,small_kernel)11177   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, small_kernel) {
11178     TEST_REQUIRES_ARM_NEON_DOT;
11179     for (size_t k = 1; k <= 40; k += 9) {
11180       GemmMicrokernelTester()
11181         .mr(8)
11182         .nr(16)
11183         .kr(4)
11184         .sr(1)
11185         .m(8)
11186         .n(16)
11187         .k(k)
11188         .ks(3)
11189         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
11190     }
11191   }
11192 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,small_kernel_subtile)11193   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, small_kernel_subtile) {
11194     TEST_REQUIRES_ARM_NEON_DOT;
11195     for (size_t k = 1; k <= 40; k += 9) {
11196       for (uint32_t n = 1; n <= 16; n++) {
11197         for (uint32_t m = 1; m <= 8; m++) {
11198           GemmMicrokernelTester()
11199             .mr(8)
11200             .nr(16)
11201             .kr(4)
11202             .sr(1)
11203             .m(m)
11204             .n(n)
11205             .k(k)
11206             .ks(3)
11207             .iterations(1)
11208             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
11209         }
11210       }
11211     }
11212   }
11213 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,n_gt_16_small_kernel)11214   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16_small_kernel) {
11215     TEST_REQUIRES_ARM_NEON_DOT;
11216     for (uint32_t n = 17; n < 32; n++) {
11217       for (size_t k = 1; k <= 40; k += 9) {
11218         GemmMicrokernelTester()
11219           .mr(8)
11220           .nr(16)
11221           .kr(4)
11222           .sr(1)
11223           .m(8)
11224           .n(n)
11225           .k(k)
11226           .ks(3)
11227           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
11228       }
11229     }
11230   }
11231 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,n_div_16_small_kernel)11232   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16_small_kernel) {
11233     TEST_REQUIRES_ARM_NEON_DOT;
11234     for (uint32_t n = 32; n <= 48; n += 16) {
11235       for (size_t k = 1; k <= 40; k += 9) {
11236         GemmMicrokernelTester()
11237           .mr(8)
11238           .nr(16)
11239           .kr(4)
11240           .sr(1)
11241           .m(8)
11242           .n(n)
11243           .k(k)
11244           .ks(3)
11245           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
11246       }
11247     }
11248   }
11249 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,strided_cm_subtile)11250   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, strided_cm_subtile) {
11251     TEST_REQUIRES_ARM_NEON_DOT;
11252     for (size_t k = 1; k <= 40; k += 9) {
11253       for (uint32_t n = 1; n <= 16; n++) {
11254         for (uint32_t m = 1; m <= 8; m++) {
11255           GemmMicrokernelTester()
11256             .mr(8)
11257             .nr(16)
11258             .kr(4)
11259             .sr(1)
11260             .m(m)
11261             .n(n)
11262             .k(k)
11263             .cm_stride(19)
11264             .iterations(1)
11265             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
11266         }
11267       }
11268     }
11269   }
11270 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,a_offset)11271   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, a_offset) {
11272     TEST_REQUIRES_ARM_NEON_DOT;
11273     for (size_t k = 1; k <= 40; k += 9) {
11274       GemmMicrokernelTester()
11275         .mr(8)
11276         .nr(16)
11277         .kr(4)
11278         .sr(1)
11279         .m(8)
11280         .n(16)
11281         .k(k)
11282         .ks(3)
11283         .a_offset(331)
11284         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
11285     }
11286   }
11287 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,zero)11288   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, zero) {
11289     TEST_REQUIRES_ARM_NEON_DOT;
11290     for (size_t k = 1; k <= 40; k += 9) {
11291       for (uint32_t mz = 0; mz < 8; mz++) {
11292         GemmMicrokernelTester()
11293           .mr(8)
11294           .nr(16)
11295           .kr(4)
11296           .sr(1)
11297           .m(8)
11298           .n(16)
11299           .k(k)
11300           .ks(3)
11301           .a_offset(331)
11302           .zero_index(mz)
11303           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
11304       }
11305     }
11306   }
11307 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,qmin)11308   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, qmin) {
11309     TEST_REQUIRES_ARM_NEON_DOT;
11310     GemmMicrokernelTester()
11311       .mr(8)
11312       .nr(16)
11313       .kr(4)
11314       .sr(1)
11315       .m(8)
11316       .n(16)
11317       .k(8)
11318       .qmin(128)
11319       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
11320   }
11321 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,qmax)11322   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, qmax) {
11323     TEST_REQUIRES_ARM_NEON_DOT;
11324     GemmMicrokernelTester()
11325       .mr(8)
11326       .nr(16)
11327       .kr(4)
11328       .sr(1)
11329       .m(8)
11330       .n(16)
11331       .k(8)
11332       .qmax(128)
11333       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
11334   }
11335 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,strided_cm)11336   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, strided_cm) {
11337     TEST_REQUIRES_ARM_NEON_DOT;
11338     GemmMicrokernelTester()
11339       .mr(8)
11340       .nr(16)
11341       .kr(4)
11342       .sr(1)
11343       .m(8)
11344       .n(16)
11345       .k(8)
11346       .cm_stride(19)
11347       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
11348   }
11349 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,no_a_zero_point)11350   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, no_a_zero_point) {
11351     TEST_REQUIRES_ARM_NEON_DOT;
11352     for (size_t k = 1; k <= 40; k += 9) {
11353       GemmMicrokernelTester()
11354         .mr(8)
11355         .nr(16)
11356         .kr(4)
11357         .sr(1)
11358         .m(8)
11359         .n(16)
11360         .k(k)
11361         .a_zero_point(0)
11362         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
11363     }
11364   }
11365 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,no_b_zero_point)11366   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, no_b_zero_point) {
11367     TEST_REQUIRES_ARM_NEON_DOT;
11368     for (size_t k = 1; k <= 40; k += 9) {
11369       GemmMicrokernelTester()
11370         .mr(8)
11371         .nr(16)
11372         .kr(4)
11373         .sr(1)
11374         .m(8)
11375         .n(16)
11376         .k(k)
11377         .b_zero_point(0)
11378         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
11379     }
11380   }
11381 
TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,no_zero_point)11382   TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, no_zero_point) {
11383     TEST_REQUIRES_ARM_NEON_DOT;
11384     for (size_t k = 1; k <= 40; k += 9) {
11385       GemmMicrokernelTester()
11386         .mr(8)
11387         .nr(16)
11388         .kr(4)
11389         .sr(1)
11390         .m(8)
11391         .n(16)
11392         .k(k)
11393         .a_zero_point(0)
11394         .b_zero_point(0)
11395         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
11396     }
11397   }
11398 #endif  // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
11399