• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 //   Specification: test/qu8-igemm-minmax-rndnu.yaml
11 //   Generator: tools/generate-gemm-test.py
12 
13 
14 #include <gtest/gtest.h>
15 
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 
20 #include <xnnpack/gemm.h>
21 #include <xnnpack/igemm.h>
22 #include <xnnpack/ppmm.h>
23 #include "gemm-microkernel-tester.h"
24 
25 
26 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8)27   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) {
28     TEST_REQUIRES_ARM_NEON;
29     GemmMicrokernelTester()
30       .mr(4)
31       .nr(8)
32       .kr(1)
33       .sr(1)
34       .m(4)
35       .n(8)
36       .k(8)
37       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
38   }
39 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,strided_cn)40   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cn) {
41     TEST_REQUIRES_ARM_NEON;
42     GemmMicrokernelTester()
43       .mr(4)
44       .nr(8)
45       .kr(1)
46       .sr(1)
47       .m(4)
48       .n(8)
49       .k(8)
50       .cn_stride(11)
51       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
52   }
53 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile)54   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
55     TEST_REQUIRES_ARM_NEON;
56     for (uint32_t n = 1; n <= 8; n++) {
57       for (uint32_t m = 1; m <= 4; m++) {
58         GemmMicrokernelTester()
59           .mr(4)
60           .nr(8)
61           .kr(1)
62           .sr(1)
63           .m(m)
64           .n(n)
65           .k(8)
66           .iterations(1)
67           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
68       }
69     }
70   }
71 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_m)72   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
73     TEST_REQUIRES_ARM_NEON;
74     for (uint32_t m = 1; m <= 4; m++) {
75       GemmMicrokernelTester()
76         .mr(4)
77         .nr(8)
78         .kr(1)
79         .sr(1)
80         .m(m)
81         .n(8)
82         .k(8)
83         .iterations(1)
84         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
85     }
86   }
87 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_n)88   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
89     TEST_REQUIRES_ARM_NEON;
90     for (uint32_t n = 1; n <= 8; n++) {
91       GemmMicrokernelTester()
92         .mr(4)
93         .nr(8)
94         .kr(1)
95         .sr(1)
96         .m(4)
97         .n(n)
98         .k(8)
99         .iterations(1)
100         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
101     }
102   }
103 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_lt_8)104   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) {
105     TEST_REQUIRES_ARM_NEON;
106     for (size_t k = 1; k < 8; k++) {
107       GemmMicrokernelTester()
108         .mr(4)
109         .nr(8)
110         .kr(1)
111         .sr(1)
112         .m(4)
113         .n(8)
114         .k(k)
115         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
116     }
117   }
118 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_lt_8_subtile)119   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
120     TEST_REQUIRES_ARM_NEON;
121     for (size_t k = 1; k < 8; k++) {
122       for (uint32_t n = 1; n <= 8; n++) {
123         for (uint32_t m = 1; m <= 4; m++) {
124           GemmMicrokernelTester()
125             .mr(4)
126             .nr(8)
127             .kr(1)
128             .sr(1)
129             .m(m)
130             .n(n)
131             .k(k)
132             .iterations(1)
133             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
134         }
135       }
136     }
137   }
138 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_gt_8)139   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) {
140     TEST_REQUIRES_ARM_NEON;
141     for (size_t k = 9; k < 16; k++) {
142       GemmMicrokernelTester()
143         .mr(4)
144         .nr(8)
145         .kr(1)
146         .sr(1)
147         .m(4)
148         .n(8)
149         .k(k)
150         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
151     }
152   }
153 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_gt_8_subtile)154   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
155     TEST_REQUIRES_ARM_NEON;
156     for (size_t k = 9; k < 16; k++) {
157       for (uint32_t n = 1; n <= 8; n++) {
158         for (uint32_t m = 1; m <= 4; m++) {
159           GemmMicrokernelTester()
160             .mr(4)
161             .nr(8)
162             .kr(1)
163             .sr(1)
164             .m(m)
165             .n(n)
166             .k(k)
167             .iterations(1)
168             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
169         }
170       }
171     }
172   }
173 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_div_8)174   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8) {
175     TEST_REQUIRES_ARM_NEON;
176     for (size_t k = 16; k <= 80; k += 8) {
177       GemmMicrokernelTester()
178         .mr(4)
179         .nr(8)
180         .kr(1)
181         .sr(1)
182         .m(4)
183         .n(8)
184         .k(k)
185         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
186     }
187   }
188 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_div_8_subtile)189   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
190     TEST_REQUIRES_ARM_NEON;
191     for (size_t k = 16; k <= 80; k += 8) {
192       for (uint32_t n = 1; n <= 8; n++) {
193         for (uint32_t m = 1; m <= 4; m++) {
194           GemmMicrokernelTester()
195             .mr(4)
196             .nr(8)
197             .kr(1)
198             .sr(1)
199             .m(m)
200             .n(n)
201             .k(k)
202             .iterations(1)
203             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
204         }
205       }
206     }
207   }
208 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8)209   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8) {
210     TEST_REQUIRES_ARM_NEON;
211     for (uint32_t n = 9; n < 16; n++) {
212       for (size_t k = 1; k <= 40; k += 9) {
213         GemmMicrokernelTester()
214           .mr(4)
215           .nr(8)
216           .kr(1)
217           .sr(1)
218           .m(4)
219           .n(n)
220           .k(k)
221           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
222       }
223     }
224   }
225 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8_strided_cn)226   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_strided_cn) {
227     TEST_REQUIRES_ARM_NEON;
228     for (uint32_t n = 9; n < 16; n++) {
229       for (size_t k = 1; k <= 40; k += 9) {
230         GemmMicrokernelTester()
231           .mr(4)
232           .nr(8)
233           .kr(1)
234           .sr(1)
235           .m(4)
236           .n(n)
237           .k(k)
238           .cn_stride(11)
239           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
240       }
241     }
242   }
243 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8_subtile)244   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_subtile) {
245     TEST_REQUIRES_ARM_NEON;
246     for (uint32_t n = 9; n < 16; n++) {
247       for (size_t k = 1; k <= 40; k += 9) {
248         for (uint32_t m = 1; m <= 4; m++) {
249           GemmMicrokernelTester()
250             .mr(4)
251             .nr(8)
252             .kr(1)
253             .sr(1)
254             .m(m)
255             .n(n)
256             .k(k)
257             .iterations(1)
258             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
259         }
260       }
261     }
262   }
263 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8)264   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8) {
265     TEST_REQUIRES_ARM_NEON;
266     for (uint32_t n = 16; n <= 24; n += 8) {
267       for (size_t k = 1; k <= 40; k += 9) {
268         GemmMicrokernelTester()
269           .mr(4)
270           .nr(8)
271           .kr(1)
272           .sr(1)
273           .m(4)
274           .n(n)
275           .k(k)
276           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
277       }
278     }
279   }
280 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8_strided_cn)281   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_strided_cn) {
282     TEST_REQUIRES_ARM_NEON;
283     for (uint32_t n = 16; n <= 24; n += 8) {
284       for (size_t k = 1; k <= 40; k += 9) {
285         GemmMicrokernelTester()
286           .mr(4)
287           .nr(8)
288           .kr(1)
289           .sr(1)
290           .m(4)
291           .n(n)
292           .k(k)
293           .cn_stride(11)
294           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
295       }
296     }
297   }
298 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8_subtile)299   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_subtile) {
300     TEST_REQUIRES_ARM_NEON;
301     for (uint32_t n = 16; n <= 24; n += 8) {
302       for (size_t k = 1; k <= 40; k += 9) {
303         for (uint32_t m = 1; m <= 4; m++) {
304           GemmMicrokernelTester()
305             .mr(4)
306             .nr(8)
307             .kr(1)
308             .sr(1)
309             .m(m)
310             .n(n)
311             .k(k)
312             .iterations(1)
313             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
314         }
315       }
316     }
317   }
318 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,small_kernel)319   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, small_kernel) {
320     TEST_REQUIRES_ARM_NEON;
321     for (size_t k = 1; k <= 40; k += 9) {
322       GemmMicrokernelTester()
323         .mr(4)
324         .nr(8)
325         .kr(1)
326         .sr(1)
327         .m(4)
328         .n(8)
329         .k(k)
330         .ks(3)
331         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
332     }
333   }
334 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,small_kernel_subtile)335   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, small_kernel_subtile) {
336     TEST_REQUIRES_ARM_NEON;
337     for (size_t k = 1; k <= 40; k += 9) {
338       for (uint32_t n = 1; n <= 8; n++) {
339         for (uint32_t m = 1; m <= 4; m++) {
340           GemmMicrokernelTester()
341             .mr(4)
342             .nr(8)
343             .kr(1)
344             .sr(1)
345             .m(m)
346             .n(n)
347             .k(k)
348             .ks(3)
349             .iterations(1)
350             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
351         }
352       }
353     }
354   }
355 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8_small_kernel)356   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_small_kernel) {
357     TEST_REQUIRES_ARM_NEON;
358     for (uint32_t n = 9; n < 16; n++) {
359       for (size_t k = 1; k <= 40; k += 9) {
360         GemmMicrokernelTester()
361           .mr(4)
362           .nr(8)
363           .kr(1)
364           .sr(1)
365           .m(4)
366           .n(n)
367           .k(k)
368           .ks(3)
369           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
370       }
371     }
372   }
373 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8_small_kernel)374   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_small_kernel) {
375     TEST_REQUIRES_ARM_NEON;
376     for (uint32_t n = 16; n <= 24; n += 8) {
377       for (size_t k = 1; k <= 40; k += 9) {
378         GemmMicrokernelTester()
379           .mr(4)
380           .nr(8)
381           .kr(1)
382           .sr(1)
383           .m(4)
384           .n(n)
385           .k(k)
386           .ks(3)
387           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
388       }
389     }
390   }
391 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,strided_cm_subtile)392   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
393     TEST_REQUIRES_ARM_NEON;
394     for (size_t k = 1; k <= 40; k += 9) {
395       for (uint32_t n = 1; n <= 8; n++) {
396         for (uint32_t m = 1; m <= 4; m++) {
397           GemmMicrokernelTester()
398             .mr(4)
399             .nr(8)
400             .kr(1)
401             .sr(1)
402             .m(m)
403             .n(n)
404             .k(k)
405             .cm_stride(11)
406             .iterations(1)
407             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
408         }
409       }
410     }
411   }
412 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,a_offset)413   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, a_offset) {
414     TEST_REQUIRES_ARM_NEON;
415     for (size_t k = 1; k <= 40; k += 9) {
416       GemmMicrokernelTester()
417         .mr(4)
418         .nr(8)
419         .kr(1)
420         .sr(1)
421         .m(4)
422         .n(8)
423         .k(k)
424         .ks(3)
425         .a_offset(163)
426         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
427     }
428   }
429 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,zero)430   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, zero) {
431     TEST_REQUIRES_ARM_NEON;
432     for (size_t k = 1; k <= 40; k += 9) {
433       for (uint32_t mz = 0; mz < 4; mz++) {
434         GemmMicrokernelTester()
435           .mr(4)
436           .nr(8)
437           .kr(1)
438           .sr(1)
439           .m(4)
440           .n(8)
441           .k(k)
442           .ks(3)
443           .a_offset(163)
444           .zero_index(mz)
445           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
446       }
447     }
448   }
449 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,qmin)450   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmin) {
451     TEST_REQUIRES_ARM_NEON;
452     GemmMicrokernelTester()
453       .mr(4)
454       .nr(8)
455       .kr(1)
456       .sr(1)
457       .m(4)
458       .n(8)
459       .k(8)
460       .qmin(128)
461       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
462   }
463 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,qmax)464   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmax) {
465     TEST_REQUIRES_ARM_NEON;
466     GemmMicrokernelTester()
467       .mr(4)
468       .nr(8)
469       .kr(1)
470       .sr(1)
471       .m(4)
472       .n(8)
473       .k(8)
474       .qmax(128)
475       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
476   }
477 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,strided_cm)478   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm) {
479     TEST_REQUIRES_ARM_NEON;
480     GemmMicrokernelTester()
481       .mr(4)
482       .nr(8)
483       .kr(1)
484       .sr(1)
485       .m(4)
486       .n(8)
487       .k(8)
488       .cm_stride(11)
489       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
490   }
491 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,no_a_zero_point)492   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, no_a_zero_point) {
493     TEST_REQUIRES_ARM_NEON;
494     for (size_t k = 1; k <= 40; k += 9) {
495       GemmMicrokernelTester()
496         .mr(4)
497         .nr(8)
498         .kr(1)
499         .sr(1)
500         .m(4)
501         .n(8)
502         .k(k)
503         .a_zero_point(0)
504         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
505     }
506   }
507 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,no_b_zero_point)508   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, no_b_zero_point) {
509     TEST_REQUIRES_ARM_NEON;
510     for (size_t k = 1; k <= 40; k += 9) {
511       GemmMicrokernelTester()
512         .mr(4)
513         .nr(8)
514         .kr(1)
515         .sr(1)
516         .m(4)
517         .n(8)
518         .k(k)
519         .b_zero_point(0)
520         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
521     }
522   }
523 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,no_zero_point)524   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, no_zero_point) {
525     TEST_REQUIRES_ARM_NEON;
526     for (size_t k = 1; k <= 40; k += 9) {
527       GemmMicrokernelTester()
528         .mr(4)
529         .nr(8)
530         .kr(1)
531         .sr(1)
532         .m(4)
533         .n(8)
534         .k(k)
535         .a_zero_point(0)
536         .b_zero_point(0)
537         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
538     }
539   }
540 #endif  // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
541 
542 
543 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_eq_8)544   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8) {
545     TEST_REQUIRES_ARM_NEON;
546     GemmMicrokernelTester()
547       .mr(4)
548       .nr(8)
549       .kr(1)
550       .sr(1)
551       .m(4)
552       .n(8)
553       .k(8)
554       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
555   }
556 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,strided_cn)557   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cn) {
558     TEST_REQUIRES_ARM_NEON;
559     GemmMicrokernelTester()
560       .mr(4)
561       .nr(8)
562       .kr(1)
563       .sr(1)
564       .m(4)
565       .n(8)
566       .k(8)
567       .cn_stride(11)
568       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
569   }
570 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_eq_8_subtile)571   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile) {
572     TEST_REQUIRES_ARM_NEON;
573     for (uint32_t n = 1; n <= 8; n++) {
574       for (uint32_t m = 1; m <= 4; m++) {
575         GemmMicrokernelTester()
576           .mr(4)
577           .nr(8)
578           .kr(1)
579           .sr(1)
580           .m(m)
581           .n(n)
582           .k(8)
583           .iterations(1)
584           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
585       }
586     }
587   }
588 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_eq_8_subtile_m)589   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) {
590     TEST_REQUIRES_ARM_NEON;
591     for (uint32_t m = 1; m <= 4; m++) {
592       GemmMicrokernelTester()
593         .mr(4)
594         .nr(8)
595         .kr(1)
596         .sr(1)
597         .m(m)
598         .n(8)
599         .k(8)
600         .iterations(1)
601         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
602     }
603   }
604 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_eq_8_subtile_n)605   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) {
606     TEST_REQUIRES_ARM_NEON;
607     for (uint32_t n = 1; n <= 8; n++) {
608       GemmMicrokernelTester()
609         .mr(4)
610         .nr(8)
611         .kr(1)
612         .sr(1)
613         .m(4)
614         .n(n)
615         .k(8)
616         .iterations(1)
617         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
618     }
619   }
620 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_lt_8)621   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8) {
622     TEST_REQUIRES_ARM_NEON;
623     for (size_t k = 1; k < 8; k++) {
624       GemmMicrokernelTester()
625         .mr(4)
626         .nr(8)
627         .kr(1)
628         .sr(1)
629         .m(4)
630         .n(8)
631         .k(k)
632         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
633     }
634   }
635 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_lt_8_subtile)636   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8_subtile) {
637     TEST_REQUIRES_ARM_NEON;
638     for (size_t k = 1; k < 8; k++) {
639       for (uint32_t n = 1; n <= 8; n++) {
640         for (uint32_t m = 1; m <= 4; m++) {
641           GemmMicrokernelTester()
642             .mr(4)
643             .nr(8)
644             .kr(1)
645             .sr(1)
646             .m(m)
647             .n(n)
648             .k(k)
649             .iterations(1)
650             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
651         }
652       }
653     }
654   }
655 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_gt_8)656   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8) {
657     TEST_REQUIRES_ARM_NEON;
658     for (size_t k = 9; k < 16; k++) {
659       GemmMicrokernelTester()
660         .mr(4)
661         .nr(8)
662         .kr(1)
663         .sr(1)
664         .m(4)
665         .n(8)
666         .k(k)
667         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
668     }
669   }
670 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_gt_8_subtile)671   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8_subtile) {
672     TEST_REQUIRES_ARM_NEON;
673     for (size_t k = 9; k < 16; k++) {
674       for (uint32_t n = 1; n <= 8; n++) {
675         for (uint32_t m = 1; m <= 4; m++) {
676           GemmMicrokernelTester()
677             .mr(4)
678             .nr(8)
679             .kr(1)
680             .sr(1)
681             .m(m)
682             .n(n)
683             .k(k)
684             .iterations(1)
685             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
686         }
687       }
688     }
689   }
690 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_div_8)691   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8) {
692     TEST_REQUIRES_ARM_NEON;
693     for (size_t k = 16; k <= 80; k += 8) {
694       GemmMicrokernelTester()
695         .mr(4)
696         .nr(8)
697         .kr(1)
698         .sr(1)
699         .m(4)
700         .n(8)
701         .k(k)
702         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
703     }
704   }
705 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_div_8_subtile)706   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8_subtile) {
707     TEST_REQUIRES_ARM_NEON;
708     for (size_t k = 16; k <= 80; k += 8) {
709       for (uint32_t n = 1; n <= 8; n++) {
710         for (uint32_t m = 1; m <= 4; m++) {
711           GemmMicrokernelTester()
712             .mr(4)
713             .nr(8)
714             .kr(1)
715             .sr(1)
716             .m(m)
717             .n(n)
718             .k(k)
719             .iterations(1)
720             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
721         }
722       }
723     }
724   }
725 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_gt_8)726   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8) {
727     TEST_REQUIRES_ARM_NEON;
728     for (uint32_t n = 9; n < 16; n++) {
729       for (size_t k = 1; k <= 40; k += 9) {
730         GemmMicrokernelTester()
731           .mr(4)
732           .nr(8)
733           .kr(1)
734           .sr(1)
735           .m(4)
736           .n(n)
737           .k(k)
738           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
739       }
740     }
741   }
742 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_gt_8_strided_cn)743   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_strided_cn) {
744     TEST_REQUIRES_ARM_NEON;
745     for (uint32_t n = 9; n < 16; n++) {
746       for (size_t k = 1; k <= 40; k += 9) {
747         GemmMicrokernelTester()
748           .mr(4)
749           .nr(8)
750           .kr(1)
751           .sr(1)
752           .m(4)
753           .n(n)
754           .k(k)
755           .cn_stride(11)
756           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
757       }
758     }
759   }
760 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_gt_8_subtile)761   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_subtile) {
762     TEST_REQUIRES_ARM_NEON;
763     for (uint32_t n = 9; n < 16; n++) {
764       for (size_t k = 1; k <= 40; k += 9) {
765         for (uint32_t m = 1; m <= 4; m++) {
766           GemmMicrokernelTester()
767             .mr(4)
768             .nr(8)
769             .kr(1)
770             .sr(1)
771             .m(m)
772             .n(n)
773             .k(k)
774             .iterations(1)
775             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
776         }
777       }
778     }
779   }
780 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_div_8)781   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8) {
782     TEST_REQUIRES_ARM_NEON;
783     for (uint32_t n = 16; n <= 24; n += 8) {
784       for (size_t k = 1; k <= 40; k += 9) {
785         GemmMicrokernelTester()
786           .mr(4)
787           .nr(8)
788           .kr(1)
789           .sr(1)
790           .m(4)
791           .n(n)
792           .k(k)
793           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
794       }
795     }
796   }
797 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_div_8_strided_cn)798   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_strided_cn) {
799     TEST_REQUIRES_ARM_NEON;
800     for (uint32_t n = 16; n <= 24; n += 8) {
801       for (size_t k = 1; k <= 40; k += 9) {
802         GemmMicrokernelTester()
803           .mr(4)
804           .nr(8)
805           .kr(1)
806           .sr(1)
807           .m(4)
808           .n(n)
809           .k(k)
810           .cn_stride(11)
811           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
812       }
813     }
814   }
815 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_div_8_subtile)816   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_subtile) {
817     TEST_REQUIRES_ARM_NEON;
818     for (uint32_t n = 16; n <= 24; n += 8) {
819       for (size_t k = 1; k <= 40; k += 9) {
820         for (uint32_t m = 1; m <= 4; m++) {
821           GemmMicrokernelTester()
822             .mr(4)
823             .nr(8)
824             .kr(1)
825             .sr(1)
826             .m(m)
827             .n(n)
828             .k(k)
829             .iterations(1)
830             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
831         }
832       }
833     }
834   }
835 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,small_kernel)836   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, small_kernel) {
837     TEST_REQUIRES_ARM_NEON;
838     for (size_t k = 1; k <= 40; k += 9) {
839       GemmMicrokernelTester()
840         .mr(4)
841         .nr(8)
842         .kr(1)
843         .sr(1)
844         .m(4)
845         .n(8)
846         .k(k)
847         .ks(3)
848         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
849     }
850   }
851 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,small_kernel_subtile)852   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, small_kernel_subtile) {
853     TEST_REQUIRES_ARM_NEON;
854     for (size_t k = 1; k <= 40; k += 9) {
855       for (uint32_t n = 1; n <= 8; n++) {
856         for (uint32_t m = 1; m <= 4; m++) {
857           GemmMicrokernelTester()
858             .mr(4)
859             .nr(8)
860             .kr(1)
861             .sr(1)
862             .m(m)
863             .n(n)
864             .k(k)
865             .ks(3)
866             .iterations(1)
867             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
868         }
869       }
870     }
871   }
872 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_gt_8_small_kernel)873   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_small_kernel) {
874     TEST_REQUIRES_ARM_NEON;
875     for (uint32_t n = 9; n < 16; n++) {
876       for (size_t k = 1; k <= 40; k += 9) {
877         GemmMicrokernelTester()
878           .mr(4)
879           .nr(8)
880           .kr(1)
881           .sr(1)
882           .m(4)
883           .n(n)
884           .k(k)
885           .ks(3)
886           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
887       }
888     }
889   }
890 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_div_8_small_kernel)891   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_small_kernel) {
892     TEST_REQUIRES_ARM_NEON;
893     for (uint32_t n = 16; n <= 24; n += 8) {
894       for (size_t k = 1; k <= 40; k += 9) {
895         GemmMicrokernelTester()
896           .mr(4)
897           .nr(8)
898           .kr(1)
899           .sr(1)
900           .m(4)
901           .n(n)
902           .k(k)
903           .ks(3)
904           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
905       }
906     }
907   }
908 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,strided_cm_subtile)909   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cm_subtile) {
910     TEST_REQUIRES_ARM_NEON;
911     for (size_t k = 1; k <= 40; k += 9) {
912       for (uint32_t n = 1; n <= 8; n++) {
913         for (uint32_t m = 1; m <= 4; m++) {
914           GemmMicrokernelTester()
915             .mr(4)
916             .nr(8)
917             .kr(1)
918             .sr(1)
919             .m(m)
920             .n(n)
921             .k(k)
922             .cm_stride(11)
923             .iterations(1)
924             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
925         }
926       }
927     }
928   }
929 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,a_offset)930   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, a_offset) {
931     TEST_REQUIRES_ARM_NEON;
932     for (size_t k = 1; k <= 40; k += 9) {
933       GemmMicrokernelTester()
934         .mr(4)
935         .nr(8)
936         .kr(1)
937         .sr(1)
938         .m(4)
939         .n(8)
940         .k(k)
941         .ks(3)
942         .a_offset(163)
943         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
944     }
945   }
946 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,zero)947   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, zero) {
948     TEST_REQUIRES_ARM_NEON;
949     for (size_t k = 1; k <= 40; k += 9) {
950       for (uint32_t mz = 0; mz < 4; mz++) {
951         GemmMicrokernelTester()
952           .mr(4)
953           .nr(8)
954           .kr(1)
955           .sr(1)
956           .m(4)
957           .n(8)
958           .k(k)
959           .ks(3)
960           .a_offset(163)
961           .zero_index(mz)
962           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
963       }
964     }
965   }
966 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,qmin)967   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, qmin) {
968     TEST_REQUIRES_ARM_NEON;
969     GemmMicrokernelTester()
970       .mr(4)
971       .nr(8)
972       .kr(1)
973       .sr(1)
974       .m(4)
975       .n(8)
976       .k(8)
977       .qmin(128)
978       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
979   }
980 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,qmax)981   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, qmax) {
982     TEST_REQUIRES_ARM_NEON;
983     GemmMicrokernelTester()
984       .mr(4)
985       .nr(8)
986       .kr(1)
987       .sr(1)
988       .m(4)
989       .n(8)
990       .k(8)
991       .qmax(128)
992       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
993   }
994 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,strided_cm)995   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cm) {
996     TEST_REQUIRES_ARM_NEON;
997     GemmMicrokernelTester()
998       .mr(4)
999       .nr(8)
1000       .kr(1)
1001       .sr(1)
1002       .m(4)
1003       .n(8)
1004       .k(8)
1005       .cm_stride(11)
1006       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1007   }
1008 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,no_a_zero_point)1009   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, no_a_zero_point) {
1010     TEST_REQUIRES_ARM_NEON;
1011     for (size_t k = 1; k <= 40; k += 9) {
1012       GemmMicrokernelTester()
1013         .mr(4)
1014         .nr(8)
1015         .kr(1)
1016         .sr(1)
1017         .m(4)
1018         .n(8)
1019         .k(k)
1020         .a_zero_point(0)
1021         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1022     }
1023   }
1024 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,no_b_zero_point)1025   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, no_b_zero_point) {
1026     TEST_REQUIRES_ARM_NEON;
1027     for (size_t k = 1; k <= 40; k += 9) {
1028       GemmMicrokernelTester()
1029         .mr(4)
1030         .nr(8)
1031         .kr(1)
1032         .sr(1)
1033         .m(4)
1034         .n(8)
1035         .k(k)
1036         .b_zero_point(0)
1037         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1038     }
1039   }
1040 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,no_zero_point)1041   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, no_zero_point) {
1042     TEST_REQUIRES_ARM_NEON;
1043     for (size_t k = 1; k <= 40; k += 9) {
1044       GemmMicrokernelTester()
1045         .mr(4)
1046         .nr(8)
1047         .kr(1)
1048         .sr(1)
1049         .m(4)
1050         .n(8)
1051         .k(k)
1052         .a_zero_point(0)
1053         .b_zero_point(0)
1054         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1055     }
1056   }
1057 #endif  // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
1058 
1059 
1060 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,k_eq_8)1061   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_eq_8) {
1062     TEST_REQUIRES_ARM_NEON;
1063     GemmMicrokernelTester()
1064       .mr(2)
1065       .nr(8)
1066       .kr(1)
1067       .sr(1)
1068       .m(2)
1069       .n(8)
1070       .k(8)
1071       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1072   }
1073 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,strided_cn)1074   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, strided_cn) {
1075     TEST_REQUIRES_ARM_NEON;
1076     GemmMicrokernelTester()
1077       .mr(2)
1078       .nr(8)
1079       .kr(1)
1080       .sr(1)
1081       .m(2)
1082       .n(8)
1083       .k(8)
1084       .cn_stride(11)
1085       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1086   }
1087 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,k_eq_8_subtile)1088   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_eq_8_subtile) {
1089     TEST_REQUIRES_ARM_NEON;
1090     for (uint32_t n = 1; n <= 8; n++) {
1091       for (uint32_t m = 1; m <= 2; m++) {
1092         GemmMicrokernelTester()
1093           .mr(2)
1094           .nr(8)
1095           .kr(1)
1096           .sr(1)
1097           .m(m)
1098           .n(n)
1099           .k(8)
1100           .iterations(1)
1101           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1102       }
1103     }
1104   }
1105 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,k_eq_8_subtile_m)1106   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
1107     TEST_REQUIRES_ARM_NEON;
1108     for (uint32_t m = 1; m <= 2; m++) {
1109       GemmMicrokernelTester()
1110         .mr(2)
1111         .nr(8)
1112         .kr(1)
1113         .sr(1)
1114         .m(m)
1115         .n(8)
1116         .k(8)
1117         .iterations(1)
1118         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1119     }
1120   }
1121 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,k_eq_8_subtile_n)1122   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
1123     TEST_REQUIRES_ARM_NEON;
1124     for (uint32_t n = 1; n <= 8; n++) {
1125       GemmMicrokernelTester()
1126         .mr(2)
1127         .nr(8)
1128         .kr(1)
1129         .sr(1)
1130         .m(2)
1131         .n(n)
1132         .k(8)
1133         .iterations(1)
1134         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1135     }
1136   }
1137 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,k_lt_8)1138   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_lt_8) {
1139     TEST_REQUIRES_ARM_NEON;
1140     for (size_t k = 1; k < 8; k++) {
1141       GemmMicrokernelTester()
1142         .mr(2)
1143         .nr(8)
1144         .kr(1)
1145         .sr(1)
1146         .m(2)
1147         .n(8)
1148         .k(k)
1149         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1150     }
1151   }
1152 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,k_lt_8_subtile)1153   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_lt_8_subtile) {
1154     TEST_REQUIRES_ARM_NEON;
1155     for (size_t k = 1; k < 8; k++) {
1156       for (uint32_t n = 1; n <= 8; n++) {
1157         for (uint32_t m = 1; m <= 2; m++) {
1158           GemmMicrokernelTester()
1159             .mr(2)
1160             .nr(8)
1161             .kr(1)
1162             .sr(1)
1163             .m(m)
1164             .n(n)
1165             .k(k)
1166             .iterations(1)
1167             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1168         }
1169       }
1170     }
1171   }
1172 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,k_gt_8)1173   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_gt_8) {
1174     TEST_REQUIRES_ARM_NEON;
1175     for (size_t k = 9; k < 16; k++) {
1176       GemmMicrokernelTester()
1177         .mr(2)
1178         .nr(8)
1179         .kr(1)
1180         .sr(1)
1181         .m(2)
1182         .n(8)
1183         .k(k)
1184         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1185     }
1186   }
1187 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,k_gt_8_subtile)1188   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_gt_8_subtile) {
1189     TEST_REQUIRES_ARM_NEON;
1190     for (size_t k = 9; k < 16; k++) {
1191       for (uint32_t n = 1; n <= 8; n++) {
1192         for (uint32_t m = 1; m <= 2; m++) {
1193           GemmMicrokernelTester()
1194             .mr(2)
1195             .nr(8)
1196             .kr(1)
1197             .sr(1)
1198             .m(m)
1199             .n(n)
1200             .k(k)
1201             .iterations(1)
1202             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1203         }
1204       }
1205     }
1206   }
1207 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,k_div_8)1208   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_div_8) {
1209     TEST_REQUIRES_ARM_NEON;
1210     for (size_t k = 16; k <= 80; k += 8) {
1211       GemmMicrokernelTester()
1212         .mr(2)
1213         .nr(8)
1214         .kr(1)
1215         .sr(1)
1216         .m(2)
1217         .n(8)
1218         .k(k)
1219         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1220     }
1221   }
1222 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,k_div_8_subtile)1223   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_div_8_subtile) {
1224     TEST_REQUIRES_ARM_NEON;
1225     for (size_t k = 16; k <= 80; k += 8) {
1226       for (uint32_t n = 1; n <= 8; n++) {
1227         for (uint32_t m = 1; m <= 2; m++) {
1228           GemmMicrokernelTester()
1229             .mr(2)
1230             .nr(8)
1231             .kr(1)
1232             .sr(1)
1233             .m(m)
1234             .n(n)
1235             .k(k)
1236             .iterations(1)
1237             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1238         }
1239       }
1240     }
1241   }
1242 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,n_gt_8)1243   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, n_gt_8) {
1244     TEST_REQUIRES_ARM_NEON;
1245     for (uint32_t n = 9; n < 16; n++) {
1246       for (size_t k = 1; k <= 40; k += 9) {
1247         GemmMicrokernelTester()
1248           .mr(2)
1249           .nr(8)
1250           .kr(1)
1251           .sr(1)
1252           .m(2)
1253           .n(n)
1254           .k(k)
1255           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1256       }
1257     }
1258   }
1259 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,n_gt_8_strided_cn)1260   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
1261     TEST_REQUIRES_ARM_NEON;
1262     for (uint32_t n = 9; n < 16; n++) {
1263       for (size_t k = 1; k <= 40; k += 9) {
1264         GemmMicrokernelTester()
1265           .mr(2)
1266           .nr(8)
1267           .kr(1)
1268           .sr(1)
1269           .m(2)
1270           .n(n)
1271           .k(k)
1272           .cn_stride(11)
1273           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1274       }
1275     }
1276   }
1277 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,n_gt_8_subtile)1278   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, n_gt_8_subtile) {
1279     TEST_REQUIRES_ARM_NEON;
1280     for (uint32_t n = 9; n < 16; n++) {
1281       for (size_t k = 1; k <= 40; k += 9) {
1282         for (uint32_t m = 1; m <= 2; m++) {
1283           GemmMicrokernelTester()
1284             .mr(2)
1285             .nr(8)
1286             .kr(1)
1287             .sr(1)
1288             .m(m)
1289             .n(n)
1290             .k(k)
1291             .iterations(1)
1292             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1293         }
1294       }
1295     }
1296   }
1297 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,n_div_8)1298   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, n_div_8) {
1299     TEST_REQUIRES_ARM_NEON;
1300     for (uint32_t n = 16; n <= 24; n += 8) {
1301       for (size_t k = 1; k <= 40; k += 9) {
1302         GemmMicrokernelTester()
1303           .mr(2)
1304           .nr(8)
1305           .kr(1)
1306           .sr(1)
1307           .m(2)
1308           .n(n)
1309           .k(k)
1310           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1311       }
1312     }
1313   }
1314 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,n_div_8_strided_cn)1315   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
1316     TEST_REQUIRES_ARM_NEON;
1317     for (uint32_t n = 16; n <= 24; n += 8) {
1318       for (size_t k = 1; k <= 40; k += 9) {
1319         GemmMicrokernelTester()
1320           .mr(2)
1321           .nr(8)
1322           .kr(1)
1323           .sr(1)
1324           .m(2)
1325           .n(n)
1326           .k(k)
1327           .cn_stride(11)
1328           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1329       }
1330     }
1331   }
1332 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,n_div_8_subtile)1333   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, n_div_8_subtile) {
1334     TEST_REQUIRES_ARM_NEON;
1335     for (uint32_t n = 16; n <= 24; n += 8) {
1336       for (size_t k = 1; k <= 40; k += 9) {
1337         for (uint32_t m = 1; m <= 2; m++) {
1338           GemmMicrokernelTester()
1339             .mr(2)
1340             .nr(8)
1341             .kr(1)
1342             .sr(1)
1343             .m(m)
1344             .n(n)
1345             .k(k)
1346             .iterations(1)
1347             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1348         }
1349       }
1350     }
1351   }
1352 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,small_kernel)1353   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, small_kernel) {
1354     TEST_REQUIRES_ARM_NEON;
1355     for (size_t k = 1; k <= 40; k += 9) {
1356       GemmMicrokernelTester()
1357         .mr(2)
1358         .nr(8)
1359         .kr(1)
1360         .sr(1)
1361         .m(2)
1362         .n(8)
1363         .k(k)
1364         .ks(3)
1365         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1366     }
1367   }
1368 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,small_kernel_subtile)1369   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, small_kernel_subtile) {
1370     TEST_REQUIRES_ARM_NEON;
1371     for (size_t k = 1; k <= 40; k += 9) {
1372       for (uint32_t n = 1; n <= 8; n++) {
1373         for (uint32_t m = 1; m <= 2; m++) {
1374           GemmMicrokernelTester()
1375             .mr(2)
1376             .nr(8)
1377             .kr(1)
1378             .sr(1)
1379             .m(m)
1380             .n(n)
1381             .k(k)
1382             .ks(3)
1383             .iterations(1)
1384             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1385         }
1386       }
1387     }
1388   }
1389 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,n_gt_8_small_kernel)1390   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
1391     TEST_REQUIRES_ARM_NEON;
1392     for (uint32_t n = 9; n < 16; n++) {
1393       for (size_t k = 1; k <= 40; k += 9) {
1394         GemmMicrokernelTester()
1395           .mr(2)
1396           .nr(8)
1397           .kr(1)
1398           .sr(1)
1399           .m(2)
1400           .n(n)
1401           .k(k)
1402           .ks(3)
1403           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1404       }
1405     }
1406   }
1407 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,n_div_8_small_kernel)1408   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
1409     TEST_REQUIRES_ARM_NEON;
1410     for (uint32_t n = 16; n <= 24; n += 8) {
1411       for (size_t k = 1; k <= 40; k += 9) {
1412         GemmMicrokernelTester()
1413           .mr(2)
1414           .nr(8)
1415           .kr(1)
1416           .sr(1)
1417           .m(2)
1418           .n(n)
1419           .k(k)
1420           .ks(3)
1421           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1422       }
1423     }
1424   }
1425 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,strided_cm_subtile)1426   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, strided_cm_subtile) {
1427     TEST_REQUIRES_ARM_NEON;
1428     for (size_t k = 1; k <= 40; k += 9) {
1429       for (uint32_t n = 1; n <= 8; n++) {
1430         for (uint32_t m = 1; m <= 2; m++) {
1431           GemmMicrokernelTester()
1432             .mr(2)
1433             .nr(8)
1434             .kr(1)
1435             .sr(1)
1436             .m(m)
1437             .n(n)
1438             .k(k)
1439             .cm_stride(11)
1440             .iterations(1)
1441             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1442         }
1443       }
1444     }
1445   }
1446 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,a_offset)1447   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, a_offset) {
1448     TEST_REQUIRES_ARM_NEON;
1449     for (size_t k = 1; k <= 40; k += 9) {
1450       GemmMicrokernelTester()
1451         .mr(2)
1452         .nr(8)
1453         .kr(1)
1454         .sr(1)
1455         .m(2)
1456         .n(8)
1457         .k(k)
1458         .ks(3)
1459         .a_offset(83)
1460         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1461     }
1462   }
1463 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,zero)1464   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, zero) {
1465     TEST_REQUIRES_ARM_NEON;
1466     for (size_t k = 1; k <= 40; k += 9) {
1467       for (uint32_t mz = 0; mz < 2; mz++) {
1468         GemmMicrokernelTester()
1469           .mr(2)
1470           .nr(8)
1471           .kr(1)
1472           .sr(1)
1473           .m(2)
1474           .n(8)
1475           .k(k)
1476           .ks(3)
1477           .a_offset(83)
1478           .zero_index(mz)
1479           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1480       }
1481     }
1482   }
1483 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,qmin)1484   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, qmin) {
1485     TEST_REQUIRES_ARM_NEON;
1486     GemmMicrokernelTester()
1487       .mr(2)
1488       .nr(8)
1489       .kr(1)
1490       .sr(1)
1491       .m(2)
1492       .n(8)
1493       .k(8)
1494       .qmin(128)
1495       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1496   }
1497 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,qmax)1498   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, qmax) {
1499     TEST_REQUIRES_ARM_NEON;
1500     GemmMicrokernelTester()
1501       .mr(2)
1502       .nr(8)
1503       .kr(1)
1504       .sr(1)
1505       .m(2)
1506       .n(8)
1507       .k(8)
1508       .qmax(128)
1509       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1510   }
1511 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,strided_cm)1512   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, strided_cm) {
1513     TEST_REQUIRES_ARM_NEON;
1514     GemmMicrokernelTester()
1515       .mr(2)
1516       .nr(8)
1517       .kr(1)
1518       .sr(1)
1519       .m(2)
1520       .n(8)
1521       .k(8)
1522       .cm_stride(11)
1523       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1524   }
1525 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,no_a_zero_point)1526   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, no_a_zero_point) {
1527     TEST_REQUIRES_ARM_NEON;
1528     for (size_t k = 1; k <= 40; k += 9) {
1529       GemmMicrokernelTester()
1530         .mr(2)
1531         .nr(8)
1532         .kr(1)
1533         .sr(1)
1534         .m(2)
1535         .n(8)
1536         .k(k)
1537         .a_zero_point(0)
1538         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1539     }
1540   }
1541 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,no_b_zero_point)1542   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, no_b_zero_point) {
1543     TEST_REQUIRES_ARM_NEON;
1544     for (size_t k = 1; k <= 40; k += 9) {
1545       GemmMicrokernelTester()
1546         .mr(2)
1547         .nr(8)
1548         .kr(1)
1549         .sr(1)
1550         .m(2)
1551         .n(8)
1552         .k(k)
1553         .b_zero_point(0)
1554         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1555     }
1556   }
1557 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,no_zero_point)1558   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, no_zero_point) {
1559     TEST_REQUIRES_ARM_NEON;
1560     for (size_t k = 1; k <= 40; k += 9) {
1561       GemmMicrokernelTester()
1562         .mr(2)
1563         .nr(8)
1564         .kr(1)
1565         .sr(1)
1566         .m(2)
1567         .n(8)
1568         .k(k)
1569         .a_zero_point(0)
1570         .b_zero_point(0)
1571         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1572     }
1573   }
1574 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1575 
1576 
1577 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,k_eq_8)1578   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_eq_8) {
1579     TEST_REQUIRES_ARM_NEON;
1580     GemmMicrokernelTester()
1581       .mr(3)
1582       .nr(8)
1583       .kr(1)
1584       .sr(1)
1585       .m(3)
1586       .n(8)
1587       .k(8)
1588       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1589   }
1590 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,strided_cn)1591   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, strided_cn) {
1592     TEST_REQUIRES_ARM_NEON;
1593     GemmMicrokernelTester()
1594       .mr(3)
1595       .nr(8)
1596       .kr(1)
1597       .sr(1)
1598       .m(3)
1599       .n(8)
1600       .k(8)
1601       .cn_stride(11)
1602       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1603   }
1604 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,k_eq_8_subtile)1605   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_eq_8_subtile) {
1606     TEST_REQUIRES_ARM_NEON;
1607     for (uint32_t n = 1; n <= 8; n++) {
1608       for (uint32_t m = 1; m <= 3; m++) {
1609         GemmMicrokernelTester()
1610           .mr(3)
1611           .nr(8)
1612           .kr(1)
1613           .sr(1)
1614           .m(m)
1615           .n(n)
1616           .k(8)
1617           .iterations(1)
1618           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1619       }
1620     }
1621   }
1622 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,k_eq_8_subtile_m)1623   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
1624     TEST_REQUIRES_ARM_NEON;
1625     for (uint32_t m = 1; m <= 3; m++) {
1626       GemmMicrokernelTester()
1627         .mr(3)
1628         .nr(8)
1629         .kr(1)
1630         .sr(1)
1631         .m(m)
1632         .n(8)
1633         .k(8)
1634         .iterations(1)
1635         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1636     }
1637   }
1638 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,k_eq_8_subtile_n)1639   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
1640     TEST_REQUIRES_ARM_NEON;
1641     for (uint32_t n = 1; n <= 8; n++) {
1642       GemmMicrokernelTester()
1643         .mr(3)
1644         .nr(8)
1645         .kr(1)
1646         .sr(1)
1647         .m(3)
1648         .n(n)
1649         .k(8)
1650         .iterations(1)
1651         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1652     }
1653   }
1654 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,k_lt_8)1655   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_lt_8) {
1656     TEST_REQUIRES_ARM_NEON;
1657     for (size_t k = 1; k < 8; k++) {
1658       GemmMicrokernelTester()
1659         .mr(3)
1660         .nr(8)
1661         .kr(1)
1662         .sr(1)
1663         .m(3)
1664         .n(8)
1665         .k(k)
1666         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1667     }
1668   }
1669 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,k_lt_8_subtile)1670   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_lt_8_subtile) {
1671     TEST_REQUIRES_ARM_NEON;
1672     for (size_t k = 1; k < 8; k++) {
1673       for (uint32_t n = 1; n <= 8; n++) {
1674         for (uint32_t m = 1; m <= 3; m++) {
1675           GemmMicrokernelTester()
1676             .mr(3)
1677             .nr(8)
1678             .kr(1)
1679             .sr(1)
1680             .m(m)
1681             .n(n)
1682             .k(k)
1683             .iterations(1)
1684             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1685         }
1686       }
1687     }
1688   }
1689 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,k_gt_8)1690   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_gt_8) {
1691     TEST_REQUIRES_ARM_NEON;
1692     for (size_t k = 9; k < 16; k++) {
1693       GemmMicrokernelTester()
1694         .mr(3)
1695         .nr(8)
1696         .kr(1)
1697         .sr(1)
1698         .m(3)
1699         .n(8)
1700         .k(k)
1701         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1702     }
1703   }
1704 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,k_gt_8_subtile)1705   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_gt_8_subtile) {
1706     TEST_REQUIRES_ARM_NEON;
1707     for (size_t k = 9; k < 16; k++) {
1708       for (uint32_t n = 1; n <= 8; n++) {
1709         for (uint32_t m = 1; m <= 3; m++) {
1710           GemmMicrokernelTester()
1711             .mr(3)
1712             .nr(8)
1713             .kr(1)
1714             .sr(1)
1715             .m(m)
1716             .n(n)
1717             .k(k)
1718             .iterations(1)
1719             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1720         }
1721       }
1722     }
1723   }
1724 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,k_div_8)1725   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_div_8) {
1726     TEST_REQUIRES_ARM_NEON;
1727     for (size_t k = 16; k <= 80; k += 8) {
1728       GemmMicrokernelTester()
1729         .mr(3)
1730         .nr(8)
1731         .kr(1)
1732         .sr(1)
1733         .m(3)
1734         .n(8)
1735         .k(k)
1736         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1737     }
1738   }
1739 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,k_div_8_subtile)1740   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_div_8_subtile) {
1741     TEST_REQUIRES_ARM_NEON;
1742     for (size_t k = 16; k <= 80; k += 8) {
1743       for (uint32_t n = 1; n <= 8; n++) {
1744         for (uint32_t m = 1; m <= 3; m++) {
1745           GemmMicrokernelTester()
1746             .mr(3)
1747             .nr(8)
1748             .kr(1)
1749             .sr(1)
1750             .m(m)
1751             .n(n)
1752             .k(k)
1753             .iterations(1)
1754             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1755         }
1756       }
1757     }
1758   }
1759 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,n_gt_8)1760   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, n_gt_8) {
1761     TEST_REQUIRES_ARM_NEON;
1762     for (uint32_t n = 9; n < 16; n++) {
1763       for (size_t k = 1; k <= 40; k += 9) {
1764         GemmMicrokernelTester()
1765           .mr(3)
1766           .nr(8)
1767           .kr(1)
1768           .sr(1)
1769           .m(3)
1770           .n(n)
1771           .k(k)
1772           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1773       }
1774     }
1775   }
1776 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,n_gt_8_strided_cn)1777   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
1778     TEST_REQUIRES_ARM_NEON;
1779     for (uint32_t n = 9; n < 16; n++) {
1780       for (size_t k = 1; k <= 40; k += 9) {
1781         GemmMicrokernelTester()
1782           .mr(3)
1783           .nr(8)
1784           .kr(1)
1785           .sr(1)
1786           .m(3)
1787           .n(n)
1788           .k(k)
1789           .cn_stride(11)
1790           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1791       }
1792     }
1793   }
1794 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,n_gt_8_subtile)1795   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, n_gt_8_subtile) {
1796     TEST_REQUIRES_ARM_NEON;
1797     for (uint32_t n = 9; n < 16; n++) {
1798       for (size_t k = 1; k <= 40; k += 9) {
1799         for (uint32_t m = 1; m <= 3; m++) {
1800           GemmMicrokernelTester()
1801             .mr(3)
1802             .nr(8)
1803             .kr(1)
1804             .sr(1)
1805             .m(m)
1806             .n(n)
1807             .k(k)
1808             .iterations(1)
1809             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1810         }
1811       }
1812     }
1813   }
1814 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,n_div_8)1815   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, n_div_8) {
1816     TEST_REQUIRES_ARM_NEON;
1817     for (uint32_t n = 16; n <= 24; n += 8) {
1818       for (size_t k = 1; k <= 40; k += 9) {
1819         GemmMicrokernelTester()
1820           .mr(3)
1821           .nr(8)
1822           .kr(1)
1823           .sr(1)
1824           .m(3)
1825           .n(n)
1826           .k(k)
1827           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1828       }
1829     }
1830   }
1831 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,n_div_8_strided_cn)1832   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
1833     TEST_REQUIRES_ARM_NEON;
1834     for (uint32_t n = 16; n <= 24; n += 8) {
1835       for (size_t k = 1; k <= 40; k += 9) {
1836         GemmMicrokernelTester()
1837           .mr(3)
1838           .nr(8)
1839           .kr(1)
1840           .sr(1)
1841           .m(3)
1842           .n(n)
1843           .k(k)
1844           .cn_stride(11)
1845           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1846       }
1847     }
1848   }
1849 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,n_div_8_subtile)1850   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, n_div_8_subtile) {
1851     TEST_REQUIRES_ARM_NEON;
1852     for (uint32_t n = 16; n <= 24; n += 8) {
1853       for (size_t k = 1; k <= 40; k += 9) {
1854         for (uint32_t m = 1; m <= 3; m++) {
1855           GemmMicrokernelTester()
1856             .mr(3)
1857             .nr(8)
1858             .kr(1)
1859             .sr(1)
1860             .m(m)
1861             .n(n)
1862             .k(k)
1863             .iterations(1)
1864             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1865         }
1866       }
1867     }
1868   }
1869 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,small_kernel)1870   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, small_kernel) {
1871     TEST_REQUIRES_ARM_NEON;
1872     for (size_t k = 1; k <= 40; k += 9) {
1873       GemmMicrokernelTester()
1874         .mr(3)
1875         .nr(8)
1876         .kr(1)
1877         .sr(1)
1878         .m(3)
1879         .n(8)
1880         .k(k)
1881         .ks(3)
1882         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1883     }
1884   }
1885 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,small_kernel_subtile)1886   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, small_kernel_subtile) {
1887     TEST_REQUIRES_ARM_NEON;
1888     for (size_t k = 1; k <= 40; k += 9) {
1889       for (uint32_t n = 1; n <= 8; n++) {
1890         for (uint32_t m = 1; m <= 3; m++) {
1891           GemmMicrokernelTester()
1892             .mr(3)
1893             .nr(8)
1894             .kr(1)
1895             .sr(1)
1896             .m(m)
1897             .n(n)
1898             .k(k)
1899             .ks(3)
1900             .iterations(1)
1901             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1902         }
1903       }
1904     }
1905   }
1906 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,n_gt_8_small_kernel)1907   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
1908     TEST_REQUIRES_ARM_NEON;
1909     for (uint32_t n = 9; n < 16; n++) {
1910       for (size_t k = 1; k <= 40; k += 9) {
1911         GemmMicrokernelTester()
1912           .mr(3)
1913           .nr(8)
1914           .kr(1)
1915           .sr(1)
1916           .m(3)
1917           .n(n)
1918           .k(k)
1919           .ks(3)
1920           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1921       }
1922     }
1923   }
1924 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,n_div_8_small_kernel)1925   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
1926     TEST_REQUIRES_ARM_NEON;
1927     for (uint32_t n = 16; n <= 24; n += 8) {
1928       for (size_t k = 1; k <= 40; k += 9) {
1929         GemmMicrokernelTester()
1930           .mr(3)
1931           .nr(8)
1932           .kr(1)
1933           .sr(1)
1934           .m(3)
1935           .n(n)
1936           .k(k)
1937           .ks(3)
1938           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1939       }
1940     }
1941   }
1942 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,strided_cm_subtile)1943   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, strided_cm_subtile) {
1944     TEST_REQUIRES_ARM_NEON;
1945     for (size_t k = 1; k <= 40; k += 9) {
1946       for (uint32_t n = 1; n <= 8; n++) {
1947         for (uint32_t m = 1; m <= 3; m++) {
1948           GemmMicrokernelTester()
1949             .mr(3)
1950             .nr(8)
1951             .kr(1)
1952             .sr(1)
1953             .m(m)
1954             .n(n)
1955             .k(k)
1956             .cm_stride(11)
1957             .iterations(1)
1958             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1959         }
1960       }
1961     }
1962   }
1963 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,a_offset)1964   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, a_offset) {
1965     TEST_REQUIRES_ARM_NEON;
1966     for (size_t k = 1; k <= 40; k += 9) {
1967       GemmMicrokernelTester()
1968         .mr(3)
1969         .nr(8)
1970         .kr(1)
1971         .sr(1)
1972         .m(3)
1973         .n(8)
1974         .k(k)
1975         .ks(3)
1976         .a_offset(127)
1977         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1978     }
1979   }
1980 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,zero)1981   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, zero) {
1982     TEST_REQUIRES_ARM_NEON;
1983     for (size_t k = 1; k <= 40; k += 9) {
1984       for (uint32_t mz = 0; mz < 3; mz++) {
1985         GemmMicrokernelTester()
1986           .mr(3)
1987           .nr(8)
1988           .kr(1)
1989           .sr(1)
1990           .m(3)
1991           .n(8)
1992           .k(k)
1993           .ks(3)
1994           .a_offset(127)
1995           .zero_index(mz)
1996           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1997       }
1998     }
1999   }
2000 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,qmin)2001   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, qmin) {
2002     TEST_REQUIRES_ARM_NEON;
2003     GemmMicrokernelTester()
2004       .mr(3)
2005       .nr(8)
2006       .kr(1)
2007       .sr(1)
2008       .m(3)
2009       .n(8)
2010       .k(8)
2011       .qmin(128)
2012       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2013   }
2014 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,qmax)2015   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, qmax) {
2016     TEST_REQUIRES_ARM_NEON;
2017     GemmMicrokernelTester()
2018       .mr(3)
2019       .nr(8)
2020       .kr(1)
2021       .sr(1)
2022       .m(3)
2023       .n(8)
2024       .k(8)
2025       .qmax(128)
2026       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2027   }
2028 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,strided_cm)2029   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, strided_cm) {
2030     TEST_REQUIRES_ARM_NEON;
2031     GemmMicrokernelTester()
2032       .mr(3)
2033       .nr(8)
2034       .kr(1)
2035       .sr(1)
2036       .m(3)
2037       .n(8)
2038       .k(8)
2039       .cm_stride(11)
2040       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2041   }
2042 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,no_a_zero_point)2043   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, no_a_zero_point) {
2044     TEST_REQUIRES_ARM_NEON;
2045     for (size_t k = 1; k <= 40; k += 9) {
2046       GemmMicrokernelTester()
2047         .mr(3)
2048         .nr(8)
2049         .kr(1)
2050         .sr(1)
2051         .m(3)
2052         .n(8)
2053         .k(k)
2054         .a_zero_point(0)
2055         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2056     }
2057   }
2058 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,no_b_zero_point)2059   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, no_b_zero_point) {
2060     TEST_REQUIRES_ARM_NEON;
2061     for (size_t k = 1; k <= 40; k += 9) {
2062       GemmMicrokernelTester()
2063         .mr(3)
2064         .nr(8)
2065         .kr(1)
2066         .sr(1)
2067         .m(3)
2068         .n(8)
2069         .k(k)
2070         .b_zero_point(0)
2071         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2072     }
2073   }
2074 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,no_zero_point)2075   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, no_zero_point) {
2076     TEST_REQUIRES_ARM_NEON;
2077     for (size_t k = 1; k <= 40; k += 9) {
2078       GemmMicrokernelTester()
2079         .mr(3)
2080         .nr(8)
2081         .kr(1)
2082         .sr(1)
2083         .m(3)
2084         .n(8)
2085         .k(k)
2086         .a_zero_point(0)
2087         .b_zero_point(0)
2088         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2089     }
2090   }
2091 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2092 
2093 
2094 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_eq_8)2095   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_eq_8) {
2096     TEST_REQUIRES_ARM_NEON;
2097     GemmMicrokernelTester()
2098       .mr(6)
2099       .nr(8)
2100       .kr(1)
2101       .sr(1)
2102       .m(6)
2103       .n(8)
2104       .k(8)
2105       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2106   }
2107 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,strided_cn)2108   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, strided_cn) {
2109     TEST_REQUIRES_ARM_NEON;
2110     GemmMicrokernelTester()
2111       .mr(6)
2112       .nr(8)
2113       .kr(1)
2114       .sr(1)
2115       .m(6)
2116       .n(8)
2117       .k(8)
2118       .cn_stride(11)
2119       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2120   }
2121 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_eq_8_subtile)2122   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_eq_8_subtile) {
2123     TEST_REQUIRES_ARM_NEON;
2124     for (uint32_t n = 1; n <= 8; n++) {
2125       for (uint32_t m = 1; m <= 6; m++) {
2126         GemmMicrokernelTester()
2127           .mr(6)
2128           .nr(8)
2129           .kr(1)
2130           .sr(1)
2131           .m(m)
2132           .n(n)
2133           .k(8)
2134           .iterations(1)
2135           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2136       }
2137     }
2138   }
2139 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_eq_8_subtile_m)2140   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
2141     TEST_REQUIRES_ARM_NEON;
2142     for (uint32_t m = 1; m <= 6; m++) {
2143       GemmMicrokernelTester()
2144         .mr(6)
2145         .nr(8)
2146         .kr(1)
2147         .sr(1)
2148         .m(m)
2149         .n(8)
2150         .k(8)
2151         .iterations(1)
2152         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2153     }
2154   }
2155 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_eq_8_subtile_n)2156   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
2157     TEST_REQUIRES_ARM_NEON;
2158     for (uint32_t n = 1; n <= 8; n++) {
2159       GemmMicrokernelTester()
2160         .mr(6)
2161         .nr(8)
2162         .kr(1)
2163         .sr(1)
2164         .m(6)
2165         .n(n)
2166         .k(8)
2167         .iterations(1)
2168         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2169     }
2170   }
2171 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_lt_8)2172   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_lt_8) {
2173     TEST_REQUIRES_ARM_NEON;
2174     for (size_t k = 1; k < 8; k++) {
2175       GemmMicrokernelTester()
2176         .mr(6)
2177         .nr(8)
2178         .kr(1)
2179         .sr(1)
2180         .m(6)
2181         .n(8)
2182         .k(k)
2183         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2184     }
2185   }
2186 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_lt_8_subtile)2187   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_lt_8_subtile) {
2188     TEST_REQUIRES_ARM_NEON;
2189     for (size_t k = 1; k < 8; k++) {
2190       for (uint32_t n = 1; n <= 8; n++) {
2191         for (uint32_t m = 1; m <= 6; m++) {
2192           GemmMicrokernelTester()
2193             .mr(6)
2194             .nr(8)
2195             .kr(1)
2196             .sr(1)
2197             .m(m)
2198             .n(n)
2199             .k(k)
2200             .iterations(1)
2201             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2202         }
2203       }
2204     }
2205   }
2206 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_gt_8)2207   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_gt_8) {
2208     TEST_REQUIRES_ARM_NEON;
2209     for (size_t k = 9; k < 16; k++) {
2210       GemmMicrokernelTester()
2211         .mr(6)
2212         .nr(8)
2213         .kr(1)
2214         .sr(1)
2215         .m(6)
2216         .n(8)
2217         .k(k)
2218         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2219     }
2220   }
2221 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_gt_8_subtile)2222   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_gt_8_subtile) {
2223     TEST_REQUIRES_ARM_NEON;
2224     for (size_t k = 9; k < 16; k++) {
2225       for (uint32_t n = 1; n <= 8; n++) {
2226         for (uint32_t m = 1; m <= 6; m++) {
2227           GemmMicrokernelTester()
2228             .mr(6)
2229             .nr(8)
2230             .kr(1)
2231             .sr(1)
2232             .m(m)
2233             .n(n)
2234             .k(k)
2235             .iterations(1)
2236             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2237         }
2238       }
2239     }
2240   }
2241 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_div_8)2242   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_div_8) {
2243     TEST_REQUIRES_ARM_NEON;
2244     for (size_t k = 16; k <= 80; k += 8) {
2245       GemmMicrokernelTester()
2246         .mr(6)
2247         .nr(8)
2248         .kr(1)
2249         .sr(1)
2250         .m(6)
2251         .n(8)
2252         .k(k)
2253         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2254     }
2255   }
2256 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_div_8_subtile)2257   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_div_8_subtile) {
2258     TEST_REQUIRES_ARM_NEON;
2259     for (size_t k = 16; k <= 80; k += 8) {
2260       for (uint32_t n = 1; n <= 8; n++) {
2261         for (uint32_t m = 1; m <= 6; m++) {
2262           GemmMicrokernelTester()
2263             .mr(6)
2264             .nr(8)
2265             .kr(1)
2266             .sr(1)
2267             .m(m)
2268             .n(n)
2269             .k(k)
2270             .iterations(1)
2271             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2272         }
2273       }
2274     }
2275   }
2276 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,n_gt_8)2277   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_gt_8) {
2278     TEST_REQUIRES_ARM_NEON;
2279     for (uint32_t n = 9; n < 16; n++) {
2280       for (size_t k = 1; k <= 40; k += 9) {
2281         GemmMicrokernelTester()
2282           .mr(6)
2283           .nr(8)
2284           .kr(1)
2285           .sr(1)
2286           .m(6)
2287           .n(n)
2288           .k(k)
2289           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2290       }
2291     }
2292   }
2293 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,n_gt_8_strided_cn)2294   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
2295     TEST_REQUIRES_ARM_NEON;
2296     for (uint32_t n = 9; n < 16; n++) {
2297       for (size_t k = 1; k <= 40; k += 9) {
2298         GemmMicrokernelTester()
2299           .mr(6)
2300           .nr(8)
2301           .kr(1)
2302           .sr(1)
2303           .m(6)
2304           .n(n)
2305           .k(k)
2306           .cn_stride(11)
2307           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2308       }
2309     }
2310   }
2311 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,n_gt_8_subtile)2312   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_gt_8_subtile) {
2313     TEST_REQUIRES_ARM_NEON;
2314     for (uint32_t n = 9; n < 16; n++) {
2315       for (size_t k = 1; k <= 40; k += 9) {
2316         for (uint32_t m = 1; m <= 6; m++) {
2317           GemmMicrokernelTester()
2318             .mr(6)
2319             .nr(8)
2320             .kr(1)
2321             .sr(1)
2322             .m(m)
2323             .n(n)
2324             .k(k)
2325             .iterations(1)
2326             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2327         }
2328       }
2329     }
2330   }
2331 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,n_div_8)2332   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_div_8) {
2333     TEST_REQUIRES_ARM_NEON;
2334     for (uint32_t n = 16; n <= 24; n += 8) {
2335       for (size_t k = 1; k <= 40; k += 9) {
2336         GemmMicrokernelTester()
2337           .mr(6)
2338           .nr(8)
2339           .kr(1)
2340           .sr(1)
2341           .m(6)
2342           .n(n)
2343           .k(k)
2344           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2345       }
2346     }
2347   }
2348 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,n_div_8_strided_cn)2349   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
2350     TEST_REQUIRES_ARM_NEON;
2351     for (uint32_t n = 16; n <= 24; n += 8) {
2352       for (size_t k = 1; k <= 40; k += 9) {
2353         GemmMicrokernelTester()
2354           .mr(6)
2355           .nr(8)
2356           .kr(1)
2357           .sr(1)
2358           .m(6)
2359           .n(n)
2360           .k(k)
2361           .cn_stride(11)
2362           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2363       }
2364     }
2365   }
2366 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,n_div_8_subtile)2367   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_div_8_subtile) {
2368     TEST_REQUIRES_ARM_NEON;
2369     for (uint32_t n = 16; n <= 24; n += 8) {
2370       for (size_t k = 1; k <= 40; k += 9) {
2371         for (uint32_t m = 1; m <= 6; m++) {
2372           GemmMicrokernelTester()
2373             .mr(6)
2374             .nr(8)
2375             .kr(1)
2376             .sr(1)
2377             .m(m)
2378             .n(n)
2379             .k(k)
2380             .iterations(1)
2381             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2382         }
2383       }
2384     }
2385   }
2386 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,small_kernel)2387   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, small_kernel) {
2388     TEST_REQUIRES_ARM_NEON;
2389     for (size_t k = 1; k <= 40; k += 9) {
2390       GemmMicrokernelTester()
2391         .mr(6)
2392         .nr(8)
2393         .kr(1)
2394         .sr(1)
2395         .m(6)
2396         .n(8)
2397         .k(k)
2398         .ks(3)
2399         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2400     }
2401   }
2402 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,small_kernel_subtile)2403   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, small_kernel_subtile) {
2404     TEST_REQUIRES_ARM_NEON;
2405     for (size_t k = 1; k <= 40; k += 9) {
2406       for (uint32_t n = 1; n <= 8; n++) {
2407         for (uint32_t m = 1; m <= 6; m++) {
2408           GemmMicrokernelTester()
2409             .mr(6)
2410             .nr(8)
2411             .kr(1)
2412             .sr(1)
2413             .m(m)
2414             .n(n)
2415             .k(k)
2416             .ks(3)
2417             .iterations(1)
2418             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2419         }
2420       }
2421     }
2422   }
2423 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,n_gt_8_small_kernel)2424   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
2425     TEST_REQUIRES_ARM_NEON;
2426     for (uint32_t n = 9; n < 16; n++) {
2427       for (size_t k = 1; k <= 40; k += 9) {
2428         GemmMicrokernelTester()
2429           .mr(6)
2430           .nr(8)
2431           .kr(1)
2432           .sr(1)
2433           .m(6)
2434           .n(n)
2435           .k(k)
2436           .ks(3)
2437           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2438       }
2439     }
2440   }
2441 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,n_div_8_small_kernel)2442   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
2443     TEST_REQUIRES_ARM_NEON;
2444     for (uint32_t n = 16; n <= 24; n += 8) {
2445       for (size_t k = 1; k <= 40; k += 9) {
2446         GemmMicrokernelTester()
2447           .mr(6)
2448           .nr(8)
2449           .kr(1)
2450           .sr(1)
2451           .m(6)
2452           .n(n)
2453           .k(k)
2454           .ks(3)
2455           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2456       }
2457     }
2458   }
2459 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,strided_cm_subtile)2460   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, strided_cm_subtile) {
2461     TEST_REQUIRES_ARM_NEON;
2462     for (size_t k = 1; k <= 40; k += 9) {
2463       for (uint32_t n = 1; n <= 8; n++) {
2464         for (uint32_t m = 1; m <= 6; m++) {
2465           GemmMicrokernelTester()
2466             .mr(6)
2467             .nr(8)
2468             .kr(1)
2469             .sr(1)
2470             .m(m)
2471             .n(n)
2472             .k(k)
2473             .cm_stride(11)
2474             .iterations(1)
2475             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2476         }
2477       }
2478     }
2479   }
2480 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,a_offset)2481   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, a_offset) {
2482     TEST_REQUIRES_ARM_NEON;
2483     for (size_t k = 1; k <= 40; k += 9) {
2484       GemmMicrokernelTester()
2485         .mr(6)
2486         .nr(8)
2487         .kr(1)
2488         .sr(1)
2489         .m(6)
2490         .n(8)
2491         .k(k)
2492         .ks(3)
2493         .a_offset(251)
2494         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2495     }
2496   }
2497 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,zero)2498   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, zero) {
2499     TEST_REQUIRES_ARM_NEON;
2500     for (size_t k = 1; k <= 40; k += 9) {
2501       for (uint32_t mz = 0; mz < 6; mz++) {
2502         GemmMicrokernelTester()
2503           .mr(6)
2504           .nr(8)
2505           .kr(1)
2506           .sr(1)
2507           .m(6)
2508           .n(8)
2509           .k(k)
2510           .ks(3)
2511           .a_offset(251)
2512           .zero_index(mz)
2513           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2514       }
2515     }
2516   }
2517 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,qmin)2518   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, qmin) {
2519     TEST_REQUIRES_ARM_NEON;
2520     GemmMicrokernelTester()
2521       .mr(6)
2522       .nr(8)
2523       .kr(1)
2524       .sr(1)
2525       .m(6)
2526       .n(8)
2527       .k(8)
2528       .qmin(128)
2529       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2530   }
2531 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,qmax)2532   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, qmax) {
2533     TEST_REQUIRES_ARM_NEON;
2534     GemmMicrokernelTester()
2535       .mr(6)
2536       .nr(8)
2537       .kr(1)
2538       .sr(1)
2539       .m(6)
2540       .n(8)
2541       .k(8)
2542       .qmax(128)
2543       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2544   }
2545 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,strided_cm)2546   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, strided_cm) {
2547     TEST_REQUIRES_ARM_NEON;
2548     GemmMicrokernelTester()
2549       .mr(6)
2550       .nr(8)
2551       .kr(1)
2552       .sr(1)
2553       .m(6)
2554       .n(8)
2555       .k(8)
2556       .cm_stride(11)
2557       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2558   }
2559 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,no_a_zero_point)2560   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, no_a_zero_point) {
2561     TEST_REQUIRES_ARM_NEON;
2562     for (size_t k = 1; k <= 40; k += 9) {
2563       GemmMicrokernelTester()
2564         .mr(6)
2565         .nr(8)
2566         .kr(1)
2567         .sr(1)
2568         .m(6)
2569         .n(8)
2570         .k(k)
2571         .a_zero_point(0)
2572         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2573     }
2574   }
2575 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,no_b_zero_point)2576   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, no_b_zero_point) {
2577     TEST_REQUIRES_ARM_NEON;
2578     for (size_t k = 1; k <= 40; k += 9) {
2579       GemmMicrokernelTester()
2580         .mr(6)
2581         .nr(8)
2582         .kr(1)
2583         .sr(1)
2584         .m(6)
2585         .n(8)
2586         .k(k)
2587         .b_zero_point(0)
2588         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2589     }
2590   }
2591 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,no_zero_point)2592   TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, no_zero_point) {
2593     TEST_REQUIRES_ARM_NEON;
2594     for (size_t k = 1; k <= 40; k += 9) {
2595       GemmMicrokernelTester()
2596         .mr(6)
2597         .nr(8)
2598         .kr(1)
2599         .sr(1)
2600         .m(6)
2601         .n(8)
2602         .k(k)
2603         .a_zero_point(0)
2604         .b_zero_point(0)
2605         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2606     }
2607   }
2608 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2609 
2610 
2611 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_eq_8)2612   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8) {
2613     TEST_REQUIRES_ARM_NEON;
2614     GemmMicrokernelTester()
2615       .mr(4)
2616       .nr(16)
2617       .kr(1)
2618       .sr(1)
2619       .m(4)
2620       .n(16)
2621       .k(8)
2622       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2623   }
2624 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,strided_cn)2625   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, strided_cn) {
2626     TEST_REQUIRES_ARM_NEON;
2627     GemmMicrokernelTester()
2628       .mr(4)
2629       .nr(16)
2630       .kr(1)
2631       .sr(1)
2632       .m(4)
2633       .n(16)
2634       .k(8)
2635       .cn_stride(19)
2636       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2637   }
2638 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_eq_8_subtile)2639   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_subtile) {
2640     TEST_REQUIRES_ARM_NEON;
2641     for (uint32_t n = 1; n <= 16; n++) {
2642       for (uint32_t m = 1; m <= 4; m++) {
2643         GemmMicrokernelTester()
2644           .mr(4)
2645           .nr(16)
2646           .kr(1)
2647           .sr(1)
2648           .m(m)
2649           .n(n)
2650           .k(8)
2651           .iterations(1)
2652           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2653       }
2654     }
2655   }
2656 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_eq_8_subtile_m)2657   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
2658     TEST_REQUIRES_ARM_NEON;
2659     for (uint32_t m = 1; m <= 4; m++) {
2660       GemmMicrokernelTester()
2661         .mr(4)
2662         .nr(16)
2663         .kr(1)
2664         .sr(1)
2665         .m(m)
2666         .n(16)
2667         .k(8)
2668         .iterations(1)
2669         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2670     }
2671   }
2672 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_eq_8_subtile_n)2673   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
2674     TEST_REQUIRES_ARM_NEON;
2675     for (uint32_t n = 1; n <= 16; n++) {
2676       GemmMicrokernelTester()
2677         .mr(4)
2678         .nr(16)
2679         .kr(1)
2680         .sr(1)
2681         .m(4)
2682         .n(n)
2683         .k(8)
2684         .iterations(1)
2685         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2686     }
2687   }
2688 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_lt_8)2689   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_lt_8) {
2690     TEST_REQUIRES_ARM_NEON;
2691     for (size_t k = 1; k < 8; k++) {
2692       GemmMicrokernelTester()
2693         .mr(4)
2694         .nr(16)
2695         .kr(1)
2696         .sr(1)
2697         .m(4)
2698         .n(16)
2699         .k(k)
2700         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2701     }
2702   }
2703 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_lt_8_subtile)2704   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_lt_8_subtile) {
2705     TEST_REQUIRES_ARM_NEON;
2706     for (size_t k = 1; k < 8; k++) {
2707       for (uint32_t n = 1; n <= 16; n++) {
2708         for (uint32_t m = 1; m <= 4; m++) {
2709           GemmMicrokernelTester()
2710             .mr(4)
2711             .nr(16)
2712             .kr(1)
2713             .sr(1)
2714             .m(m)
2715             .n(n)
2716             .k(k)
2717             .iterations(1)
2718             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2719         }
2720       }
2721     }
2722   }
2723 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_gt_8)2724   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_gt_8) {
2725     TEST_REQUIRES_ARM_NEON;
2726     for (size_t k = 9; k < 16; k++) {
2727       GemmMicrokernelTester()
2728         .mr(4)
2729         .nr(16)
2730         .kr(1)
2731         .sr(1)
2732         .m(4)
2733         .n(16)
2734         .k(k)
2735         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2736     }
2737   }
2738 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_gt_8_subtile)2739   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_gt_8_subtile) {
2740     TEST_REQUIRES_ARM_NEON;
2741     for (size_t k = 9; k < 16; k++) {
2742       for (uint32_t n = 1; n <= 16; n++) {
2743         for (uint32_t m = 1; m <= 4; m++) {
2744           GemmMicrokernelTester()
2745             .mr(4)
2746             .nr(16)
2747             .kr(1)
2748             .sr(1)
2749             .m(m)
2750             .n(n)
2751             .k(k)
2752             .iterations(1)
2753             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2754         }
2755       }
2756     }
2757   }
2758 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_div_8)2759   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_div_8) {
2760     TEST_REQUIRES_ARM_NEON;
2761     for (size_t k = 16; k <= 80; k += 8) {
2762       GemmMicrokernelTester()
2763         .mr(4)
2764         .nr(16)
2765         .kr(1)
2766         .sr(1)
2767         .m(4)
2768         .n(16)
2769         .k(k)
2770         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2771     }
2772   }
2773 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_div_8_subtile)2774   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_div_8_subtile) {
2775     TEST_REQUIRES_ARM_NEON;
2776     for (size_t k = 16; k <= 80; k += 8) {
2777       for (uint32_t n = 1; n <= 16; n++) {
2778         for (uint32_t m = 1; m <= 4; m++) {
2779           GemmMicrokernelTester()
2780             .mr(4)
2781             .nr(16)
2782             .kr(1)
2783             .sr(1)
2784             .m(m)
2785             .n(n)
2786             .k(k)
2787             .iterations(1)
2788             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2789         }
2790       }
2791     }
2792   }
2793 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_gt_16)2794   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16) {
2795     TEST_REQUIRES_ARM_NEON;
2796     for (uint32_t n = 17; n < 32; n++) {
2797       for (size_t k = 1; k <= 40; k += 9) {
2798         GemmMicrokernelTester()
2799           .mr(4)
2800           .nr(16)
2801           .kr(1)
2802           .sr(1)
2803           .m(4)
2804           .n(n)
2805           .k(k)
2806           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2807       }
2808     }
2809   }
2810 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_gt_16_strided_cn)2811   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
2812     TEST_REQUIRES_ARM_NEON;
2813     for (uint32_t n = 17; n < 32; n++) {
2814       for (size_t k = 1; k <= 40; k += 9) {
2815         GemmMicrokernelTester()
2816           .mr(4)
2817           .nr(16)
2818           .kr(1)
2819           .sr(1)
2820           .m(4)
2821           .n(n)
2822           .k(k)
2823           .cn_stride(19)
2824           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2825       }
2826     }
2827   }
2828 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_gt_16_subtile)2829   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16_subtile) {
2830     TEST_REQUIRES_ARM_NEON;
2831     for (uint32_t n = 17; n < 32; n++) {
2832       for (size_t k = 1; k <= 40; k += 9) {
2833         for (uint32_t m = 1; m <= 4; m++) {
2834           GemmMicrokernelTester()
2835             .mr(4)
2836             .nr(16)
2837             .kr(1)
2838             .sr(1)
2839             .m(m)
2840             .n(n)
2841             .k(k)
2842             .iterations(1)
2843             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2844         }
2845       }
2846     }
2847   }
2848 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_div_16)2849   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16) {
2850     TEST_REQUIRES_ARM_NEON;
2851     for (uint32_t n = 32; n <= 48; n += 16) {
2852       for (size_t k = 1; k <= 40; k += 9) {
2853         GemmMicrokernelTester()
2854           .mr(4)
2855           .nr(16)
2856           .kr(1)
2857           .sr(1)
2858           .m(4)
2859           .n(n)
2860           .k(k)
2861           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2862       }
2863     }
2864   }
2865 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_div_16_strided_cn)2866   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
2867     TEST_REQUIRES_ARM_NEON;
2868     for (uint32_t n = 32; n <= 48; n += 16) {
2869       for (size_t k = 1; k <= 40; k += 9) {
2870         GemmMicrokernelTester()
2871           .mr(4)
2872           .nr(16)
2873           .kr(1)
2874           .sr(1)
2875           .m(4)
2876           .n(n)
2877           .k(k)
2878           .cn_stride(19)
2879           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2880       }
2881     }
2882   }
2883 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_div_16_subtile)2884   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16_subtile) {
2885     TEST_REQUIRES_ARM_NEON;
2886     for (uint32_t n = 32; n <= 48; n += 16) {
2887       for (size_t k = 1; k <= 40; k += 9) {
2888         for (uint32_t m = 1; m <= 4; m++) {
2889           GemmMicrokernelTester()
2890             .mr(4)
2891             .nr(16)
2892             .kr(1)
2893             .sr(1)
2894             .m(m)
2895             .n(n)
2896             .k(k)
2897             .iterations(1)
2898             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2899         }
2900       }
2901     }
2902   }
2903 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,small_kernel)2904   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, small_kernel) {
2905     TEST_REQUIRES_ARM_NEON;
2906     for (size_t k = 1; k <= 40; k += 9) {
2907       GemmMicrokernelTester()
2908         .mr(4)
2909         .nr(16)
2910         .kr(1)
2911         .sr(1)
2912         .m(4)
2913         .n(16)
2914         .k(k)
2915         .ks(3)
2916         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2917     }
2918   }
2919 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,small_kernel_subtile)2920   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, small_kernel_subtile) {
2921     TEST_REQUIRES_ARM_NEON;
2922     for (size_t k = 1; k <= 40; k += 9) {
2923       for (uint32_t n = 1; n <= 16; n++) {
2924         for (uint32_t m = 1; m <= 4; m++) {
2925           GemmMicrokernelTester()
2926             .mr(4)
2927             .nr(16)
2928             .kr(1)
2929             .sr(1)
2930             .m(m)
2931             .n(n)
2932             .k(k)
2933             .ks(3)
2934             .iterations(1)
2935             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2936         }
2937       }
2938     }
2939   }
2940 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_gt_16_small_kernel)2941   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
2942     TEST_REQUIRES_ARM_NEON;
2943     for (uint32_t n = 17; n < 32; n++) {
2944       for (size_t k = 1; k <= 40; k += 9) {
2945         GemmMicrokernelTester()
2946           .mr(4)
2947           .nr(16)
2948           .kr(1)
2949           .sr(1)
2950           .m(4)
2951           .n(n)
2952           .k(k)
2953           .ks(3)
2954           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2955       }
2956     }
2957   }
2958 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_div_16_small_kernel)2959   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
2960     TEST_REQUIRES_ARM_NEON;
2961     for (uint32_t n = 32; n <= 48; n += 16) {
2962       for (size_t k = 1; k <= 40; k += 9) {
2963         GemmMicrokernelTester()
2964           .mr(4)
2965           .nr(16)
2966           .kr(1)
2967           .sr(1)
2968           .m(4)
2969           .n(n)
2970           .k(k)
2971           .ks(3)
2972           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2973       }
2974     }
2975   }
2976 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,strided_cm_subtile)2977   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, strided_cm_subtile) {
2978     TEST_REQUIRES_ARM_NEON;
2979     for (size_t k = 1; k <= 40; k += 9) {
2980       for (uint32_t n = 1; n <= 16; n++) {
2981         for (uint32_t m = 1; m <= 4; m++) {
2982           GemmMicrokernelTester()
2983             .mr(4)
2984             .nr(16)
2985             .kr(1)
2986             .sr(1)
2987             .m(m)
2988             .n(n)
2989             .k(k)
2990             .cm_stride(19)
2991             .iterations(1)
2992             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
2993         }
2994       }
2995     }
2996   }
2997 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,a_offset)2998   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, a_offset) {
2999     TEST_REQUIRES_ARM_NEON;
3000     for (size_t k = 1; k <= 40; k += 9) {
3001       GemmMicrokernelTester()
3002         .mr(4)
3003         .nr(16)
3004         .kr(1)
3005         .sr(1)
3006         .m(4)
3007         .n(16)
3008         .k(k)
3009         .ks(3)
3010         .a_offset(163)
3011         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3012     }
3013   }
3014 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,zero)3015   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, zero) {
3016     TEST_REQUIRES_ARM_NEON;
3017     for (size_t k = 1; k <= 40; k += 9) {
3018       for (uint32_t mz = 0; mz < 4; mz++) {
3019         GemmMicrokernelTester()
3020           .mr(4)
3021           .nr(16)
3022           .kr(1)
3023           .sr(1)
3024           .m(4)
3025           .n(16)
3026           .k(k)
3027           .ks(3)
3028           .a_offset(163)
3029           .zero_index(mz)
3030           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3031       }
3032     }
3033   }
3034 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,qmin)3035   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, qmin) {
3036     TEST_REQUIRES_ARM_NEON;
3037     GemmMicrokernelTester()
3038       .mr(4)
3039       .nr(16)
3040       .kr(1)
3041       .sr(1)
3042       .m(4)
3043       .n(16)
3044       .k(8)
3045       .qmin(128)
3046       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3047   }
3048 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,qmax)3049   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, qmax) {
3050     TEST_REQUIRES_ARM_NEON;
3051     GemmMicrokernelTester()
3052       .mr(4)
3053       .nr(16)
3054       .kr(1)
3055       .sr(1)
3056       .m(4)
3057       .n(16)
3058       .k(8)
3059       .qmax(128)
3060       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3061   }
3062 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,strided_cm)3063   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, strided_cm) {
3064     TEST_REQUIRES_ARM_NEON;
3065     GemmMicrokernelTester()
3066       .mr(4)
3067       .nr(16)
3068       .kr(1)
3069       .sr(1)
3070       .m(4)
3071       .n(16)
3072       .k(8)
3073       .cm_stride(19)
3074       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3075   }
3076 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,no_a_zero_point)3077   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, no_a_zero_point) {
3078     TEST_REQUIRES_ARM_NEON;
3079     for (size_t k = 1; k <= 40; k += 9) {
3080       GemmMicrokernelTester()
3081         .mr(4)
3082         .nr(16)
3083         .kr(1)
3084         .sr(1)
3085         .m(4)
3086         .n(16)
3087         .k(k)
3088         .a_zero_point(0)
3089         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3090     }
3091   }
3092 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,no_b_zero_point)3093   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, no_b_zero_point) {
3094     TEST_REQUIRES_ARM_NEON;
3095     for (size_t k = 1; k <= 40; k += 9) {
3096       GemmMicrokernelTester()
3097         .mr(4)
3098         .nr(16)
3099         .kr(1)
3100         .sr(1)
3101         .m(4)
3102         .n(16)
3103         .k(k)
3104         .b_zero_point(0)
3105         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3106     }
3107   }
3108 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,no_zero_point)3109   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, no_zero_point) {
3110     TEST_REQUIRES_ARM_NEON;
3111     for (size_t k = 1; k <= 40; k += 9) {
3112       GemmMicrokernelTester()
3113         .mr(4)
3114         .nr(16)
3115         .kr(1)
3116         .sr(1)
3117         .m(4)
3118         .n(16)
3119         .k(k)
3120         .a_zero_point(0)
3121         .b_zero_point(0)
3122         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3123     }
3124   }
3125 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3126 
3127 
3128 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_eq_8)3129   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8) {
3130     TEST_REQUIRES_ARM_NEON;
3131     GemmMicrokernelTester()
3132       .mr(6)
3133       .nr(16)
3134       .kr(1)
3135       .sr(1)
3136       .m(6)
3137       .n(16)
3138       .k(8)
3139       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3140   }
3141 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,strided_cn)3142   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, strided_cn) {
3143     TEST_REQUIRES_ARM_NEON;
3144     GemmMicrokernelTester()
3145       .mr(6)
3146       .nr(16)
3147       .kr(1)
3148       .sr(1)
3149       .m(6)
3150       .n(16)
3151       .k(8)
3152       .cn_stride(19)
3153       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3154   }
3155 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_eq_8_subtile)3156   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8_subtile) {
3157     TEST_REQUIRES_ARM_NEON;
3158     for (uint32_t n = 1; n <= 16; n++) {
3159       for (uint32_t m = 1; m <= 6; m++) {
3160         GemmMicrokernelTester()
3161           .mr(6)
3162           .nr(16)
3163           .kr(1)
3164           .sr(1)
3165           .m(m)
3166           .n(n)
3167           .k(8)
3168           .iterations(1)
3169           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3170       }
3171     }
3172   }
3173 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_eq_8_subtile_m)3174   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
3175     TEST_REQUIRES_ARM_NEON;
3176     for (uint32_t m = 1; m <= 6; m++) {
3177       GemmMicrokernelTester()
3178         .mr(6)
3179         .nr(16)
3180         .kr(1)
3181         .sr(1)
3182         .m(m)
3183         .n(16)
3184         .k(8)
3185         .iterations(1)
3186         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3187     }
3188   }
3189 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_eq_8_subtile_n)3190   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
3191     TEST_REQUIRES_ARM_NEON;
3192     for (uint32_t n = 1; n <= 16; n++) {
3193       GemmMicrokernelTester()
3194         .mr(6)
3195         .nr(16)
3196         .kr(1)
3197         .sr(1)
3198         .m(6)
3199         .n(n)
3200         .k(8)
3201         .iterations(1)
3202         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3203     }
3204   }
3205 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_lt_8)3206   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_lt_8) {
3207     TEST_REQUIRES_ARM_NEON;
3208     for (size_t k = 1; k < 8; k++) {
3209       GemmMicrokernelTester()
3210         .mr(6)
3211         .nr(16)
3212         .kr(1)
3213         .sr(1)
3214         .m(6)
3215         .n(16)
3216         .k(k)
3217         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3218     }
3219   }
3220 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_lt_8_subtile)3221   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_lt_8_subtile) {
3222     TEST_REQUIRES_ARM_NEON;
3223     for (size_t k = 1; k < 8; k++) {
3224       for (uint32_t n = 1; n <= 16; n++) {
3225         for (uint32_t m = 1; m <= 6; m++) {
3226           GemmMicrokernelTester()
3227             .mr(6)
3228             .nr(16)
3229             .kr(1)
3230             .sr(1)
3231             .m(m)
3232             .n(n)
3233             .k(k)
3234             .iterations(1)
3235             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3236         }
3237       }
3238     }
3239   }
3240 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_gt_8)3241   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_gt_8) {
3242     TEST_REQUIRES_ARM_NEON;
3243     for (size_t k = 9; k < 16; k++) {
3244       GemmMicrokernelTester()
3245         .mr(6)
3246         .nr(16)
3247         .kr(1)
3248         .sr(1)
3249         .m(6)
3250         .n(16)
3251         .k(k)
3252         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3253     }
3254   }
3255 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_gt_8_subtile)3256   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_gt_8_subtile) {
3257     TEST_REQUIRES_ARM_NEON;
3258     for (size_t k = 9; k < 16; k++) {
3259       for (uint32_t n = 1; n <= 16; n++) {
3260         for (uint32_t m = 1; m <= 6; m++) {
3261           GemmMicrokernelTester()
3262             .mr(6)
3263             .nr(16)
3264             .kr(1)
3265             .sr(1)
3266             .m(m)
3267             .n(n)
3268             .k(k)
3269             .iterations(1)
3270             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3271         }
3272       }
3273     }
3274   }
3275 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_div_8)3276   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_div_8) {
3277     TEST_REQUIRES_ARM_NEON;
3278     for (size_t k = 16; k <= 80; k += 8) {
3279       GemmMicrokernelTester()
3280         .mr(6)
3281         .nr(16)
3282         .kr(1)
3283         .sr(1)
3284         .m(6)
3285         .n(16)
3286         .k(k)
3287         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3288     }
3289   }
3290 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_div_8_subtile)3291   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_div_8_subtile) {
3292     TEST_REQUIRES_ARM_NEON;
3293     for (size_t k = 16; k <= 80; k += 8) {
3294       for (uint32_t n = 1; n <= 16; n++) {
3295         for (uint32_t m = 1; m <= 6; m++) {
3296           GemmMicrokernelTester()
3297             .mr(6)
3298             .nr(16)
3299             .kr(1)
3300             .sr(1)
3301             .m(m)
3302             .n(n)
3303             .k(k)
3304             .iterations(1)
3305             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3306         }
3307       }
3308     }
3309   }
3310 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,n_gt_16)3311   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16) {
3312     TEST_REQUIRES_ARM_NEON;
3313     for (uint32_t n = 17; n < 32; n++) {
3314       for (size_t k = 1; k <= 40; k += 9) {
3315         GemmMicrokernelTester()
3316           .mr(6)
3317           .nr(16)
3318           .kr(1)
3319           .sr(1)
3320           .m(6)
3321           .n(n)
3322           .k(k)
3323           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3324       }
3325     }
3326   }
3327 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,n_gt_16_strided_cn)3328   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
3329     TEST_REQUIRES_ARM_NEON;
3330     for (uint32_t n = 17; n < 32; n++) {
3331       for (size_t k = 1; k <= 40; k += 9) {
3332         GemmMicrokernelTester()
3333           .mr(6)
3334           .nr(16)
3335           .kr(1)
3336           .sr(1)
3337           .m(6)
3338           .n(n)
3339           .k(k)
3340           .cn_stride(19)
3341           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3342       }
3343     }
3344   }
3345 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,n_gt_16_subtile)3346   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16_subtile) {
3347     TEST_REQUIRES_ARM_NEON;
3348     for (uint32_t n = 17; n < 32; n++) {
3349       for (size_t k = 1; k <= 40; k += 9) {
3350         for (uint32_t m = 1; m <= 6; m++) {
3351           GemmMicrokernelTester()
3352             .mr(6)
3353             .nr(16)
3354             .kr(1)
3355             .sr(1)
3356             .m(m)
3357             .n(n)
3358             .k(k)
3359             .iterations(1)
3360             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3361         }
3362       }
3363     }
3364   }
3365 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,n_div_16)3366   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16) {
3367     TEST_REQUIRES_ARM_NEON;
3368     for (uint32_t n = 32; n <= 48; n += 16) {
3369       for (size_t k = 1; k <= 40; k += 9) {
3370         GemmMicrokernelTester()
3371           .mr(6)
3372           .nr(16)
3373           .kr(1)
3374           .sr(1)
3375           .m(6)
3376           .n(n)
3377           .k(k)
3378           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3379       }
3380     }
3381   }
3382 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,n_div_16_strided_cn)3383   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
3384     TEST_REQUIRES_ARM_NEON;
3385     for (uint32_t n = 32; n <= 48; n += 16) {
3386       for (size_t k = 1; k <= 40; k += 9) {
3387         GemmMicrokernelTester()
3388           .mr(6)
3389           .nr(16)
3390           .kr(1)
3391           .sr(1)
3392           .m(6)
3393           .n(n)
3394           .k(k)
3395           .cn_stride(19)
3396           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3397       }
3398     }
3399   }
3400 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,n_div_16_subtile)3401   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16_subtile) {
3402     TEST_REQUIRES_ARM_NEON;
3403     for (uint32_t n = 32; n <= 48; n += 16) {
3404       for (size_t k = 1; k <= 40; k += 9) {
3405         for (uint32_t m = 1; m <= 6; m++) {
3406           GemmMicrokernelTester()
3407             .mr(6)
3408             .nr(16)
3409             .kr(1)
3410             .sr(1)
3411             .m(m)
3412             .n(n)
3413             .k(k)
3414             .iterations(1)
3415             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3416         }
3417       }
3418     }
3419   }
3420 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,small_kernel)3421   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, small_kernel) {
3422     TEST_REQUIRES_ARM_NEON;
3423     for (size_t k = 1; k <= 40; k += 9) {
3424       GemmMicrokernelTester()
3425         .mr(6)
3426         .nr(16)
3427         .kr(1)
3428         .sr(1)
3429         .m(6)
3430         .n(16)
3431         .k(k)
3432         .ks(3)
3433         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3434     }
3435   }
3436 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,small_kernel_subtile)3437   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, small_kernel_subtile) {
3438     TEST_REQUIRES_ARM_NEON;
3439     for (size_t k = 1; k <= 40; k += 9) {
3440       for (uint32_t n = 1; n <= 16; n++) {
3441         for (uint32_t m = 1; m <= 6; m++) {
3442           GemmMicrokernelTester()
3443             .mr(6)
3444             .nr(16)
3445             .kr(1)
3446             .sr(1)
3447             .m(m)
3448             .n(n)
3449             .k(k)
3450             .ks(3)
3451             .iterations(1)
3452             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3453         }
3454       }
3455     }
3456   }
3457 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,n_gt_16_small_kernel)3458   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
3459     TEST_REQUIRES_ARM_NEON;
3460     for (uint32_t n = 17; n < 32; n++) {
3461       for (size_t k = 1; k <= 40; k += 9) {
3462         GemmMicrokernelTester()
3463           .mr(6)
3464           .nr(16)
3465           .kr(1)
3466           .sr(1)
3467           .m(6)
3468           .n(n)
3469           .k(k)
3470           .ks(3)
3471           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3472       }
3473     }
3474   }
3475 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,n_div_16_small_kernel)3476   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
3477     TEST_REQUIRES_ARM_NEON;
3478     for (uint32_t n = 32; n <= 48; n += 16) {
3479       for (size_t k = 1; k <= 40; k += 9) {
3480         GemmMicrokernelTester()
3481           .mr(6)
3482           .nr(16)
3483           .kr(1)
3484           .sr(1)
3485           .m(6)
3486           .n(n)
3487           .k(k)
3488           .ks(3)
3489           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3490       }
3491     }
3492   }
3493 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,strided_cm_subtile)3494   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, strided_cm_subtile) {
3495     TEST_REQUIRES_ARM_NEON;
3496     for (size_t k = 1; k <= 40; k += 9) {
3497       for (uint32_t n = 1; n <= 16; n++) {
3498         for (uint32_t m = 1; m <= 6; m++) {
3499           GemmMicrokernelTester()
3500             .mr(6)
3501             .nr(16)
3502             .kr(1)
3503             .sr(1)
3504             .m(m)
3505             .n(n)
3506             .k(k)
3507             .cm_stride(19)
3508             .iterations(1)
3509             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3510         }
3511       }
3512     }
3513   }
3514 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,a_offset)3515   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, a_offset) {
3516     TEST_REQUIRES_ARM_NEON;
3517     for (size_t k = 1; k <= 40; k += 9) {
3518       GemmMicrokernelTester()
3519         .mr(6)
3520         .nr(16)
3521         .kr(1)
3522         .sr(1)
3523         .m(6)
3524         .n(16)
3525         .k(k)
3526         .ks(3)
3527         .a_offset(251)
3528         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3529     }
3530   }
3531 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,zero)3532   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, zero) {
3533     TEST_REQUIRES_ARM_NEON;
3534     for (size_t k = 1; k <= 40; k += 9) {
3535       for (uint32_t mz = 0; mz < 6; mz++) {
3536         GemmMicrokernelTester()
3537           .mr(6)
3538           .nr(16)
3539           .kr(1)
3540           .sr(1)
3541           .m(6)
3542           .n(16)
3543           .k(k)
3544           .ks(3)
3545           .a_offset(251)
3546           .zero_index(mz)
3547           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3548       }
3549     }
3550   }
3551 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,qmin)3552   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, qmin) {
3553     TEST_REQUIRES_ARM_NEON;
3554     GemmMicrokernelTester()
3555       .mr(6)
3556       .nr(16)
3557       .kr(1)
3558       .sr(1)
3559       .m(6)
3560       .n(16)
3561       .k(8)
3562       .qmin(128)
3563       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3564   }
3565 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,qmax)3566   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, qmax) {
3567     TEST_REQUIRES_ARM_NEON;
3568     GemmMicrokernelTester()
3569       .mr(6)
3570       .nr(16)
3571       .kr(1)
3572       .sr(1)
3573       .m(6)
3574       .n(16)
3575       .k(8)
3576       .qmax(128)
3577       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3578   }
3579 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,strided_cm)3580   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, strided_cm) {
3581     TEST_REQUIRES_ARM_NEON;
3582     GemmMicrokernelTester()
3583       .mr(6)
3584       .nr(16)
3585       .kr(1)
3586       .sr(1)
3587       .m(6)
3588       .n(16)
3589       .k(8)
3590       .cm_stride(19)
3591       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3592   }
3593 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,no_a_zero_point)3594   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, no_a_zero_point) {
3595     TEST_REQUIRES_ARM_NEON;
3596     for (size_t k = 1; k <= 40; k += 9) {
3597       GemmMicrokernelTester()
3598         .mr(6)
3599         .nr(16)
3600         .kr(1)
3601         .sr(1)
3602         .m(6)
3603         .n(16)
3604         .k(k)
3605         .a_zero_point(0)
3606         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3607     }
3608   }
3609 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,no_b_zero_point)3610   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, no_b_zero_point) {
3611     TEST_REQUIRES_ARM_NEON;
3612     for (size_t k = 1; k <= 40; k += 9) {
3613       GemmMicrokernelTester()
3614         .mr(6)
3615         .nr(16)
3616         .kr(1)
3617         .sr(1)
3618         .m(6)
3619         .n(16)
3620         .k(k)
3621         .b_zero_point(0)
3622         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3623     }
3624   }
3625 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,no_zero_point)3626   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, no_zero_point) {
3627     TEST_REQUIRES_ARM_NEON;
3628     for (size_t k = 1; k <= 40; k += 9) {
3629       GemmMicrokernelTester()
3630         .mr(6)
3631         .nr(16)
3632         .kr(1)
3633         .sr(1)
3634         .m(6)
3635         .n(16)
3636         .k(k)
3637         .a_zero_point(0)
3638         .b_zero_point(0)
3639         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3640     }
3641   }
3642 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3643 
3644 
3645 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_8)3646   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_8) {
3647     TEST_REQUIRES_ARM_NEON_DOT;
3648     GemmMicrokernelTester()
3649       .mr(4)
3650       .nr(8)
3651       .kr(4)
3652       .sr(1)
3653       .m(4)
3654       .n(8)
3655       .k(8)
3656       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3657   }
3658 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,strided_cn)3659   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, strided_cn) {
3660     TEST_REQUIRES_ARM_NEON_DOT;
3661     GemmMicrokernelTester()
3662       .mr(4)
3663       .nr(8)
3664       .kr(4)
3665       .sr(1)
3666       .m(4)
3667       .n(8)
3668       .k(8)
3669       .cn_stride(11)
3670       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3671   }
3672 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_8_subtile)3673   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_8_subtile) {
3674     TEST_REQUIRES_ARM_NEON_DOT;
3675     for (uint32_t n = 1; n <= 8; n++) {
3676       for (uint32_t m = 1; m <= 4; m++) {
3677         GemmMicrokernelTester()
3678           .mr(4)
3679           .nr(8)
3680           .kr(4)
3681           .sr(1)
3682           .m(m)
3683           .n(n)
3684           .k(8)
3685           .iterations(1)
3686           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3687       }
3688     }
3689   }
3690 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_8_subtile_m)3691   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_8_subtile_m) {
3692     TEST_REQUIRES_ARM_NEON_DOT;
3693     for (uint32_t m = 1; m <= 4; m++) {
3694       GemmMicrokernelTester()
3695         .mr(4)
3696         .nr(8)
3697         .kr(4)
3698         .sr(1)
3699         .m(m)
3700         .n(8)
3701         .k(8)
3702         .iterations(1)
3703         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3704     }
3705   }
3706 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_8_subtile_n)3707   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_8_subtile_n) {
3708     TEST_REQUIRES_ARM_NEON_DOT;
3709     for (uint32_t n = 1; n <= 8; n++) {
3710       GemmMicrokernelTester()
3711         .mr(4)
3712         .nr(8)
3713         .kr(4)
3714         .sr(1)
3715         .m(4)
3716         .n(n)
3717         .k(8)
3718         .iterations(1)
3719         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3720     }
3721   }
3722 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_8)3723   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_8) {
3724     TEST_REQUIRES_ARM_NEON_DOT;
3725     for (size_t k = 1; k < 8; k++) {
3726       GemmMicrokernelTester()
3727         .mr(4)
3728         .nr(8)
3729         .kr(4)
3730         .sr(1)
3731         .m(4)
3732         .n(8)
3733         .k(k)
3734         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3735     }
3736   }
3737 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_8_subtile)3738   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_8_subtile) {
3739     TEST_REQUIRES_ARM_NEON_DOT;
3740     for (size_t k = 1; k < 8; k++) {
3741       for (uint32_t n = 1; n <= 8; n++) {
3742         for (uint32_t m = 1; m <= 4; m++) {
3743           GemmMicrokernelTester()
3744             .mr(4)
3745             .nr(8)
3746             .kr(4)
3747             .sr(1)
3748             .m(m)
3749             .n(n)
3750             .k(k)
3751             .iterations(1)
3752             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3753         }
3754       }
3755     }
3756   }
3757 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_8)3758   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_8) {
3759     TEST_REQUIRES_ARM_NEON_DOT;
3760     for (size_t k = 9; k < 16; k++) {
3761       GemmMicrokernelTester()
3762         .mr(4)
3763         .nr(8)
3764         .kr(4)
3765         .sr(1)
3766         .m(4)
3767         .n(8)
3768         .k(k)
3769         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3770     }
3771   }
3772 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_8_subtile)3773   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_8_subtile) {
3774     TEST_REQUIRES_ARM_NEON_DOT;
3775     for (size_t k = 9; k < 16; k++) {
3776       for (uint32_t n = 1; n <= 8; n++) {
3777         for (uint32_t m = 1; m <= 4; m++) {
3778           GemmMicrokernelTester()
3779             .mr(4)
3780             .nr(8)
3781             .kr(4)
3782             .sr(1)
3783             .m(m)
3784             .n(n)
3785             .k(k)
3786             .iterations(1)
3787             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3788         }
3789       }
3790     }
3791   }
3792 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,k_div_8)3793   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_div_8) {
3794     TEST_REQUIRES_ARM_NEON_DOT;
3795     for (size_t k = 16; k <= 80; k += 8) {
3796       GemmMicrokernelTester()
3797         .mr(4)
3798         .nr(8)
3799         .kr(4)
3800         .sr(1)
3801         .m(4)
3802         .n(8)
3803         .k(k)
3804         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3805     }
3806   }
3807 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,k_div_8_subtile)3808   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_div_8_subtile) {
3809     TEST_REQUIRES_ARM_NEON_DOT;
3810     for (size_t k = 16; k <= 80; k += 8) {
3811       for (uint32_t n = 1; n <= 8; n++) {
3812         for (uint32_t m = 1; m <= 4; m++) {
3813           GemmMicrokernelTester()
3814             .mr(4)
3815             .nr(8)
3816             .kr(4)
3817             .sr(1)
3818             .m(m)
3819             .n(n)
3820             .k(k)
3821             .iterations(1)
3822             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3823         }
3824       }
3825     }
3826   }
3827 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_8)3828   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_8) {
3829     TEST_REQUIRES_ARM_NEON_DOT;
3830     for (uint32_t n = 9; n < 16; n++) {
3831       for (size_t k = 1; k <= 40; k += 9) {
3832         GemmMicrokernelTester()
3833           .mr(4)
3834           .nr(8)
3835           .kr(4)
3836           .sr(1)
3837           .m(4)
3838           .n(n)
3839           .k(k)
3840           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3841       }
3842     }
3843   }
3844 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_8_strided_cn)3845   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_8_strided_cn) {
3846     TEST_REQUIRES_ARM_NEON_DOT;
3847     for (uint32_t n = 9; n < 16; n++) {
3848       for (size_t k = 1; k <= 40; k += 9) {
3849         GemmMicrokernelTester()
3850           .mr(4)
3851           .nr(8)
3852           .kr(4)
3853           .sr(1)
3854           .m(4)
3855           .n(n)
3856           .k(k)
3857           .cn_stride(11)
3858           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3859       }
3860     }
3861   }
3862 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_8_subtile)3863   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_8_subtile) {
3864     TEST_REQUIRES_ARM_NEON_DOT;
3865     for (uint32_t n = 9; n < 16; n++) {
3866       for (size_t k = 1; k <= 40; k += 9) {
3867         for (uint32_t m = 1; m <= 4; m++) {
3868           GemmMicrokernelTester()
3869             .mr(4)
3870             .nr(8)
3871             .kr(4)
3872             .sr(1)
3873             .m(m)
3874             .n(n)
3875             .k(k)
3876             .iterations(1)
3877             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3878         }
3879       }
3880     }
3881   }
3882 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,n_div_8)3883   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_div_8) {
3884     TEST_REQUIRES_ARM_NEON_DOT;
3885     for (uint32_t n = 16; n <= 24; n += 8) {
3886       for (size_t k = 1; k <= 40; k += 9) {
3887         GemmMicrokernelTester()
3888           .mr(4)
3889           .nr(8)
3890           .kr(4)
3891           .sr(1)
3892           .m(4)
3893           .n(n)
3894           .k(k)
3895           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3896       }
3897     }
3898   }
3899 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,n_div_8_strided_cn)3900   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_div_8_strided_cn) {
3901     TEST_REQUIRES_ARM_NEON_DOT;
3902     for (uint32_t n = 16; n <= 24; n += 8) {
3903       for (size_t k = 1; k <= 40; k += 9) {
3904         GemmMicrokernelTester()
3905           .mr(4)
3906           .nr(8)
3907           .kr(4)
3908           .sr(1)
3909           .m(4)
3910           .n(n)
3911           .k(k)
3912           .cn_stride(11)
3913           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3914       }
3915     }
3916   }
3917 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,n_div_8_subtile)3918   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_div_8_subtile) {
3919     TEST_REQUIRES_ARM_NEON_DOT;
3920     for (uint32_t n = 16; n <= 24; n += 8) {
3921       for (size_t k = 1; k <= 40; k += 9) {
3922         for (uint32_t m = 1; m <= 4; m++) {
3923           GemmMicrokernelTester()
3924             .mr(4)
3925             .nr(8)
3926             .kr(4)
3927             .sr(1)
3928             .m(m)
3929             .n(n)
3930             .k(k)
3931             .iterations(1)
3932             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3933         }
3934       }
3935     }
3936   }
3937 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,small_kernel)3938   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel) {
3939     TEST_REQUIRES_ARM_NEON_DOT;
3940     for (size_t k = 1; k <= 40; k += 9) {
3941       GemmMicrokernelTester()
3942         .mr(4)
3943         .nr(8)
3944         .kr(4)
3945         .sr(1)
3946         .m(4)
3947         .n(8)
3948         .k(k)
3949         .ks(3)
3950         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3951     }
3952   }
3953 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,small_kernel_subtile)3954   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel_subtile) {
3955     TEST_REQUIRES_ARM_NEON_DOT;
3956     for (size_t k = 1; k <= 40; k += 9) {
3957       for (uint32_t n = 1; n <= 8; n++) {
3958         for (uint32_t m = 1; m <= 4; m++) {
3959           GemmMicrokernelTester()
3960             .mr(4)
3961             .nr(8)
3962             .kr(4)
3963             .sr(1)
3964             .m(m)
3965             .n(n)
3966             .k(k)
3967             .ks(3)
3968             .iterations(1)
3969             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3970         }
3971       }
3972     }
3973   }
3974 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_8_small_kernel)3975   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_8_small_kernel) {
3976     TEST_REQUIRES_ARM_NEON_DOT;
3977     for (uint32_t n = 9; n < 16; n++) {
3978       for (size_t k = 1; k <= 40; k += 9) {
3979         GemmMicrokernelTester()
3980           .mr(4)
3981           .nr(8)
3982           .kr(4)
3983           .sr(1)
3984           .m(4)
3985           .n(n)
3986           .k(k)
3987           .ks(3)
3988           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
3989       }
3990     }
3991   }
3992 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,n_div_8_small_kernel)3993   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_div_8_small_kernel) {
3994     TEST_REQUIRES_ARM_NEON_DOT;
3995     for (uint32_t n = 16; n <= 24; n += 8) {
3996       for (size_t k = 1; k <= 40; k += 9) {
3997         GemmMicrokernelTester()
3998           .mr(4)
3999           .nr(8)
4000           .kr(4)
4001           .sr(1)
4002           .m(4)
4003           .n(n)
4004           .k(k)
4005           .ks(3)
4006           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4007       }
4008     }
4009   }
4010 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,strided_cm_subtile)4011   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm_subtile) {
4012     TEST_REQUIRES_ARM_NEON_DOT;
4013     for (size_t k = 1; k <= 40; k += 9) {
4014       for (uint32_t n = 1; n <= 8; n++) {
4015         for (uint32_t m = 1; m <= 4; m++) {
4016           GemmMicrokernelTester()
4017             .mr(4)
4018             .nr(8)
4019             .kr(4)
4020             .sr(1)
4021             .m(m)
4022             .n(n)
4023             .k(k)
4024             .cm_stride(11)
4025             .iterations(1)
4026             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4027         }
4028       }
4029     }
4030   }
4031 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,a_offset)4032   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, a_offset) {
4033     TEST_REQUIRES_ARM_NEON_DOT;
4034     for (size_t k = 1; k <= 40; k += 9) {
4035       GemmMicrokernelTester()
4036         .mr(4)
4037         .nr(8)
4038         .kr(4)
4039         .sr(1)
4040         .m(4)
4041         .n(8)
4042         .k(k)
4043         .ks(3)
4044         .a_offset(163)
4045         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4046     }
4047   }
4048 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,zero)4049   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, zero) {
4050     TEST_REQUIRES_ARM_NEON_DOT;
4051     for (size_t k = 1; k <= 40; k += 9) {
4052       for (uint32_t mz = 0; mz < 4; mz++) {
4053         GemmMicrokernelTester()
4054           .mr(4)
4055           .nr(8)
4056           .kr(4)
4057           .sr(1)
4058           .m(4)
4059           .n(8)
4060           .k(k)
4061           .ks(3)
4062           .a_offset(163)
4063           .zero_index(mz)
4064           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4065       }
4066     }
4067   }
4068 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,qmin)4069   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, qmin) {
4070     TEST_REQUIRES_ARM_NEON_DOT;
4071     GemmMicrokernelTester()
4072       .mr(4)
4073       .nr(8)
4074       .kr(4)
4075       .sr(1)
4076       .m(4)
4077       .n(8)
4078       .k(8)
4079       .qmin(128)
4080       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4081   }
4082 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,qmax)4083   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, qmax) {
4084     TEST_REQUIRES_ARM_NEON_DOT;
4085     GemmMicrokernelTester()
4086       .mr(4)
4087       .nr(8)
4088       .kr(4)
4089       .sr(1)
4090       .m(4)
4091       .n(8)
4092       .k(8)
4093       .qmax(128)
4094       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4095   }
4096 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,strided_cm)4097   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm) {
4098     TEST_REQUIRES_ARM_NEON_DOT;
4099     GemmMicrokernelTester()
4100       .mr(4)
4101       .nr(8)
4102       .kr(4)
4103       .sr(1)
4104       .m(4)
4105       .n(8)
4106       .k(8)
4107       .cm_stride(11)
4108       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4109   }
4110 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,no_a_zero_point)4111   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, no_a_zero_point) {
4112     TEST_REQUIRES_ARM_NEON_DOT;
4113     for (size_t k = 1; k <= 40; k += 9) {
4114       GemmMicrokernelTester()
4115         .mr(4)
4116         .nr(8)
4117         .kr(4)
4118         .sr(1)
4119         .m(4)
4120         .n(8)
4121         .k(k)
4122         .a_zero_point(0)
4123         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4124     }
4125   }
4126 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,no_b_zero_point)4127   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, no_b_zero_point) {
4128     TEST_REQUIRES_ARM_NEON_DOT;
4129     for (size_t k = 1; k <= 40; k += 9) {
4130       GemmMicrokernelTester()
4131         .mr(4)
4132         .nr(8)
4133         .kr(4)
4134         .sr(1)
4135         .m(4)
4136         .n(8)
4137         .k(k)
4138         .b_zero_point(0)
4139         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4140     }
4141   }
4142 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,no_zero_point)4143   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, no_zero_point) {
4144     TEST_REQUIRES_ARM_NEON_DOT;
4145     for (size_t k = 1; k <= 40; k += 9) {
4146       GemmMicrokernelTester()
4147         .mr(4)
4148         .nr(8)
4149         .kr(4)
4150         .sr(1)
4151         .m(4)
4152         .n(8)
4153         .k(k)
4154         .a_zero_point(0)
4155         .b_zero_point(0)
4156         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4157     }
4158   }
4159 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4160 
4161 
4162 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,k_eq_16)4163   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_eq_16) {
4164     TEST_REQUIRES_ARM_NEON_DOT;
4165     GemmMicrokernelTester()
4166       .mr(4)
4167       .nr(8)
4168       .kr(4)
4169       .sr(1)
4170       .m(4)
4171       .n(8)
4172       .k(16)
4173       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4174   }
4175 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,strided_cn)4176   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, strided_cn) {
4177     TEST_REQUIRES_ARM_NEON_DOT;
4178     GemmMicrokernelTester()
4179       .mr(4)
4180       .nr(8)
4181       .kr(4)
4182       .sr(1)
4183       .m(4)
4184       .n(8)
4185       .k(16)
4186       .cn_stride(11)
4187       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4188   }
4189 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile)4190   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile) {
4191     TEST_REQUIRES_ARM_NEON_DOT;
4192     for (uint32_t n = 1; n <= 8; n++) {
4193       for (uint32_t m = 1; m <= 4; m++) {
4194         GemmMicrokernelTester()
4195           .mr(4)
4196           .nr(8)
4197           .kr(4)
4198           .sr(1)
4199           .m(m)
4200           .n(n)
4201           .k(16)
4202           .iterations(1)
4203           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4204       }
4205     }
4206   }
4207 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile_m)4208   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_m) {
4209     TEST_REQUIRES_ARM_NEON_DOT;
4210     for (uint32_t m = 1; m <= 4; m++) {
4211       GemmMicrokernelTester()
4212         .mr(4)
4213         .nr(8)
4214         .kr(4)
4215         .sr(1)
4216         .m(m)
4217         .n(8)
4218         .k(16)
4219         .iterations(1)
4220         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4221     }
4222   }
4223 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile_n)4224   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_n) {
4225     TEST_REQUIRES_ARM_NEON_DOT;
4226     for (uint32_t n = 1; n <= 8; n++) {
4227       GemmMicrokernelTester()
4228         .mr(4)
4229         .nr(8)
4230         .kr(4)
4231         .sr(1)
4232         .m(4)
4233         .n(n)
4234         .k(16)
4235         .iterations(1)
4236         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4237     }
4238   }
4239 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,k_lt_16)4240   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_lt_16) {
4241     TEST_REQUIRES_ARM_NEON_DOT;
4242     for (size_t k = 1; k < 16; k++) {
4243       GemmMicrokernelTester()
4244         .mr(4)
4245         .nr(8)
4246         .kr(4)
4247         .sr(1)
4248         .m(4)
4249         .n(8)
4250         .k(k)
4251         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4252     }
4253   }
4254 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,k_lt_16_subtile)4255   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_lt_16_subtile) {
4256     TEST_REQUIRES_ARM_NEON_DOT;
4257     for (size_t k = 1; k < 16; k++) {
4258       for (uint32_t n = 1; n <= 8; n++) {
4259         for (uint32_t m = 1; m <= 4; m++) {
4260           GemmMicrokernelTester()
4261             .mr(4)
4262             .nr(8)
4263             .kr(4)
4264             .sr(1)
4265             .m(m)
4266             .n(n)
4267             .k(k)
4268             .iterations(1)
4269             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4270         }
4271       }
4272     }
4273   }
4274 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,k_gt_16)4275   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_gt_16) {
4276     TEST_REQUIRES_ARM_NEON_DOT;
4277     for (size_t k = 17; k < 32; k++) {
4278       GemmMicrokernelTester()
4279         .mr(4)
4280         .nr(8)
4281         .kr(4)
4282         .sr(1)
4283         .m(4)
4284         .n(8)
4285         .k(k)
4286         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4287     }
4288   }
4289 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,k_gt_16_subtile)4290   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_gt_16_subtile) {
4291     TEST_REQUIRES_ARM_NEON_DOT;
4292     for (size_t k = 17; k < 32; k++) {
4293       for (uint32_t n = 1; n <= 8; n++) {
4294         for (uint32_t m = 1; m <= 4; m++) {
4295           GemmMicrokernelTester()
4296             .mr(4)
4297             .nr(8)
4298             .kr(4)
4299             .sr(1)
4300             .m(m)
4301             .n(n)
4302             .k(k)
4303             .iterations(1)
4304             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4305         }
4306       }
4307     }
4308   }
4309 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,k_div_16)4310   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_div_16) {
4311     TEST_REQUIRES_ARM_NEON_DOT;
4312     for (size_t k = 32; k <= 160; k += 16) {
4313       GemmMicrokernelTester()
4314         .mr(4)
4315         .nr(8)
4316         .kr(4)
4317         .sr(1)
4318         .m(4)
4319         .n(8)
4320         .k(k)
4321         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4322     }
4323   }
4324 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,k_div_16_subtile)4325   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_div_16_subtile) {
4326     TEST_REQUIRES_ARM_NEON_DOT;
4327     for (size_t k = 32; k <= 160; k += 16) {
4328       for (uint32_t n = 1; n <= 8; n++) {
4329         for (uint32_t m = 1; m <= 4; m++) {
4330           GemmMicrokernelTester()
4331             .mr(4)
4332             .nr(8)
4333             .kr(4)
4334             .sr(1)
4335             .m(m)
4336             .n(n)
4337             .k(k)
4338             .iterations(1)
4339             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4340         }
4341       }
4342     }
4343   }
4344 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,n_gt_8)4345   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, n_gt_8) {
4346     TEST_REQUIRES_ARM_NEON_DOT;
4347     for (uint32_t n = 9; n < 16; n++) {
4348       for (size_t k = 1; k <= 80; k += 17) {
4349         GemmMicrokernelTester()
4350           .mr(4)
4351           .nr(8)
4352           .kr(4)
4353           .sr(1)
4354           .m(4)
4355           .n(n)
4356           .k(k)
4357           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4358       }
4359     }
4360   }
4361 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,n_gt_8_strided_cn)4362   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, n_gt_8_strided_cn) {
4363     TEST_REQUIRES_ARM_NEON_DOT;
4364     for (uint32_t n = 9; n < 16; n++) {
4365       for (size_t k = 1; k <= 80; k += 17) {
4366         GemmMicrokernelTester()
4367           .mr(4)
4368           .nr(8)
4369           .kr(4)
4370           .sr(1)
4371           .m(4)
4372           .n(n)
4373           .k(k)
4374           .cn_stride(11)
4375           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4376       }
4377     }
4378   }
4379 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,n_gt_8_subtile)4380   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, n_gt_8_subtile) {
4381     TEST_REQUIRES_ARM_NEON_DOT;
4382     for (uint32_t n = 9; n < 16; n++) {
4383       for (size_t k = 1; k <= 80; k += 17) {
4384         for (uint32_t m = 1; m <= 4; m++) {
4385           GemmMicrokernelTester()
4386             .mr(4)
4387             .nr(8)
4388             .kr(4)
4389             .sr(1)
4390             .m(m)
4391             .n(n)
4392             .k(k)
4393             .iterations(1)
4394             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4395         }
4396       }
4397     }
4398   }
4399 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,n_div_8)4400   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, n_div_8) {
4401     TEST_REQUIRES_ARM_NEON_DOT;
4402     for (uint32_t n = 16; n <= 24; n += 8) {
4403       for (size_t k = 1; k <= 80; k += 17) {
4404         GemmMicrokernelTester()
4405           .mr(4)
4406           .nr(8)
4407           .kr(4)
4408           .sr(1)
4409           .m(4)
4410           .n(n)
4411           .k(k)
4412           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4413       }
4414     }
4415   }
4416 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,n_div_8_strided_cn)4417   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, n_div_8_strided_cn) {
4418     TEST_REQUIRES_ARM_NEON_DOT;
4419     for (uint32_t n = 16; n <= 24; n += 8) {
4420       for (size_t k = 1; k <= 80; k += 17) {
4421         GemmMicrokernelTester()
4422           .mr(4)
4423           .nr(8)
4424           .kr(4)
4425           .sr(1)
4426           .m(4)
4427           .n(n)
4428           .k(k)
4429           .cn_stride(11)
4430           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4431       }
4432     }
4433   }
4434 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,n_div_8_subtile)4435   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, n_div_8_subtile) {
4436     TEST_REQUIRES_ARM_NEON_DOT;
4437     for (uint32_t n = 16; n <= 24; n += 8) {
4438       for (size_t k = 1; k <= 80; k += 17) {
4439         for (uint32_t m = 1; m <= 4; m++) {
4440           GemmMicrokernelTester()
4441             .mr(4)
4442             .nr(8)
4443             .kr(4)
4444             .sr(1)
4445             .m(m)
4446             .n(n)
4447             .k(k)
4448             .iterations(1)
4449             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4450         }
4451       }
4452     }
4453   }
4454 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,small_kernel)4455   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, small_kernel) {
4456     TEST_REQUIRES_ARM_NEON_DOT;
4457     for (size_t k = 1; k <= 80; k += 17) {
4458       GemmMicrokernelTester()
4459         .mr(4)
4460         .nr(8)
4461         .kr(4)
4462         .sr(1)
4463         .m(4)
4464         .n(8)
4465         .k(k)
4466         .ks(3)
4467         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4468     }
4469   }
4470 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,small_kernel_subtile)4471   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, small_kernel_subtile) {
4472     TEST_REQUIRES_ARM_NEON_DOT;
4473     for (size_t k = 1; k <= 80; k += 17) {
4474       for (uint32_t n = 1; n <= 8; n++) {
4475         for (uint32_t m = 1; m <= 4; m++) {
4476           GemmMicrokernelTester()
4477             .mr(4)
4478             .nr(8)
4479             .kr(4)
4480             .sr(1)
4481             .m(m)
4482             .n(n)
4483             .k(k)
4484             .ks(3)
4485             .iterations(1)
4486             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4487         }
4488       }
4489     }
4490   }
4491 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,n_gt_8_small_kernel)4492   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, n_gt_8_small_kernel) {
4493     TEST_REQUIRES_ARM_NEON_DOT;
4494     for (uint32_t n = 9; n < 16; n++) {
4495       for (size_t k = 1; k <= 80; k += 17) {
4496         GemmMicrokernelTester()
4497           .mr(4)
4498           .nr(8)
4499           .kr(4)
4500           .sr(1)
4501           .m(4)
4502           .n(n)
4503           .k(k)
4504           .ks(3)
4505           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4506       }
4507     }
4508   }
4509 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,n_div_8_small_kernel)4510   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, n_div_8_small_kernel) {
4511     TEST_REQUIRES_ARM_NEON_DOT;
4512     for (uint32_t n = 16; n <= 24; n += 8) {
4513       for (size_t k = 1; k <= 80; k += 17) {
4514         GemmMicrokernelTester()
4515           .mr(4)
4516           .nr(8)
4517           .kr(4)
4518           .sr(1)
4519           .m(4)
4520           .n(n)
4521           .k(k)
4522           .ks(3)
4523           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4524       }
4525     }
4526   }
4527 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,strided_cm_subtile)4528   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, strided_cm_subtile) {
4529     TEST_REQUIRES_ARM_NEON_DOT;
4530     for (size_t k = 1; k <= 80; k += 17) {
4531       for (uint32_t n = 1; n <= 8; n++) {
4532         for (uint32_t m = 1; m <= 4; m++) {
4533           GemmMicrokernelTester()
4534             .mr(4)
4535             .nr(8)
4536             .kr(4)
4537             .sr(1)
4538             .m(m)
4539             .n(n)
4540             .k(k)
4541             .cm_stride(11)
4542             .iterations(1)
4543             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4544         }
4545       }
4546     }
4547   }
4548 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,a_offset)4549   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, a_offset) {
4550     TEST_REQUIRES_ARM_NEON_DOT;
4551     for (size_t k = 1; k <= 80; k += 17) {
4552       GemmMicrokernelTester()
4553         .mr(4)
4554         .nr(8)
4555         .kr(4)
4556         .sr(1)
4557         .m(4)
4558         .n(8)
4559         .k(k)
4560         .ks(3)
4561         .a_offset(331)
4562         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4563     }
4564   }
4565 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,zero)4566   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, zero) {
4567     TEST_REQUIRES_ARM_NEON_DOT;
4568     for (size_t k = 1; k <= 80; k += 17) {
4569       for (uint32_t mz = 0; mz < 4; mz++) {
4570         GemmMicrokernelTester()
4571           .mr(4)
4572           .nr(8)
4573           .kr(4)
4574           .sr(1)
4575           .m(4)
4576           .n(8)
4577           .k(k)
4578           .ks(3)
4579           .a_offset(331)
4580           .zero_index(mz)
4581           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4582       }
4583     }
4584   }
4585 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,qmin)4586   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, qmin) {
4587     TEST_REQUIRES_ARM_NEON_DOT;
4588     GemmMicrokernelTester()
4589       .mr(4)
4590       .nr(8)
4591       .kr(4)
4592       .sr(1)
4593       .m(4)
4594       .n(8)
4595       .k(16)
4596       .qmin(128)
4597       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4598   }
4599 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,qmax)4600   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, qmax) {
4601     TEST_REQUIRES_ARM_NEON_DOT;
4602     GemmMicrokernelTester()
4603       .mr(4)
4604       .nr(8)
4605       .kr(4)
4606       .sr(1)
4607       .m(4)
4608       .n(8)
4609       .k(16)
4610       .qmax(128)
4611       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4612   }
4613 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,strided_cm)4614   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, strided_cm) {
4615     TEST_REQUIRES_ARM_NEON_DOT;
4616     GemmMicrokernelTester()
4617       .mr(4)
4618       .nr(8)
4619       .kr(4)
4620       .sr(1)
4621       .m(4)
4622       .n(8)
4623       .k(16)
4624       .cm_stride(11)
4625       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4626   }
4627 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,no_a_zero_point)4628   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, no_a_zero_point) {
4629     TEST_REQUIRES_ARM_NEON_DOT;
4630     for (size_t k = 1; k <= 80; k += 17) {
4631       GemmMicrokernelTester()
4632         .mr(4)
4633         .nr(8)
4634         .kr(4)
4635         .sr(1)
4636         .m(4)
4637         .n(8)
4638         .k(k)
4639         .a_zero_point(0)
4640         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4641     }
4642   }
4643 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,no_b_zero_point)4644   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, no_b_zero_point) {
4645     TEST_REQUIRES_ARM_NEON_DOT;
4646     for (size_t k = 1; k <= 80; k += 17) {
4647       GemmMicrokernelTester()
4648         .mr(4)
4649         .nr(8)
4650         .kr(4)
4651         .sr(1)
4652         .m(4)
4653         .n(8)
4654         .k(k)
4655         .b_zero_point(0)
4656         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4657     }
4658   }
4659 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,no_zero_point)4660   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, no_zero_point) {
4661     TEST_REQUIRES_ARM_NEON_DOT;
4662     for (size_t k = 1; k <= 80; k += 17) {
4663       GemmMicrokernelTester()
4664         .mr(4)
4665         .nr(8)
4666         .kr(4)
4667         .sr(1)
4668         .m(4)
4669         .n(8)
4670         .k(k)
4671         .a_zero_point(0)
4672         .b_zero_point(0)
4673         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4674     }
4675   }
4676 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4677 
4678 
4679 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,k_eq_8)4680   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_eq_8) {
4681     TEST_REQUIRES_ARM_NEON_DOT;
4682     GemmMicrokernelTester()
4683       .mr(2)
4684       .nr(8)
4685       .kr(4)
4686       .sr(1)
4687       .m(2)
4688       .n(8)
4689       .k(8)
4690       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4691   }
4692 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,strided_cn)4693   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, strided_cn) {
4694     TEST_REQUIRES_ARM_NEON_DOT;
4695     GemmMicrokernelTester()
4696       .mr(2)
4697       .nr(8)
4698       .kr(4)
4699       .sr(1)
4700       .m(2)
4701       .n(8)
4702       .k(8)
4703       .cn_stride(11)
4704       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4705   }
4706 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,k_eq_8_subtile)4707   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_eq_8_subtile) {
4708     TEST_REQUIRES_ARM_NEON_DOT;
4709     for (uint32_t n = 1; n <= 8; n++) {
4710       for (uint32_t m = 1; m <= 2; m++) {
4711         GemmMicrokernelTester()
4712           .mr(2)
4713           .nr(8)
4714           .kr(4)
4715           .sr(1)
4716           .m(m)
4717           .n(n)
4718           .k(8)
4719           .iterations(1)
4720           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4721       }
4722     }
4723   }
4724 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,k_eq_8_subtile_m)4725   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_eq_8_subtile_m) {
4726     TEST_REQUIRES_ARM_NEON_DOT;
4727     for (uint32_t m = 1; m <= 2; m++) {
4728       GemmMicrokernelTester()
4729         .mr(2)
4730         .nr(8)
4731         .kr(4)
4732         .sr(1)
4733         .m(m)
4734         .n(8)
4735         .k(8)
4736         .iterations(1)
4737         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4738     }
4739   }
4740 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,k_eq_8_subtile_n)4741   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_eq_8_subtile_n) {
4742     TEST_REQUIRES_ARM_NEON_DOT;
4743     for (uint32_t n = 1; n <= 8; n++) {
4744       GemmMicrokernelTester()
4745         .mr(2)
4746         .nr(8)
4747         .kr(4)
4748         .sr(1)
4749         .m(2)
4750         .n(n)
4751         .k(8)
4752         .iterations(1)
4753         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4754     }
4755   }
4756 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,k_lt_8)4757   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_lt_8) {
4758     TEST_REQUIRES_ARM_NEON_DOT;
4759     for (size_t k = 1; k < 8; k++) {
4760       GemmMicrokernelTester()
4761         .mr(2)
4762         .nr(8)
4763         .kr(4)
4764         .sr(1)
4765         .m(2)
4766         .n(8)
4767         .k(k)
4768         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4769     }
4770   }
4771 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,k_lt_8_subtile)4772   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_lt_8_subtile) {
4773     TEST_REQUIRES_ARM_NEON_DOT;
4774     for (size_t k = 1; k < 8; k++) {
4775       for (uint32_t n = 1; n <= 8; n++) {
4776         for (uint32_t m = 1; m <= 2; m++) {
4777           GemmMicrokernelTester()
4778             .mr(2)
4779             .nr(8)
4780             .kr(4)
4781             .sr(1)
4782             .m(m)
4783             .n(n)
4784             .k(k)
4785             .iterations(1)
4786             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4787         }
4788       }
4789     }
4790   }
4791 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,k_gt_8)4792   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_gt_8) {
4793     TEST_REQUIRES_ARM_NEON_DOT;
4794     for (size_t k = 9; k < 16; k++) {
4795       GemmMicrokernelTester()
4796         .mr(2)
4797         .nr(8)
4798         .kr(4)
4799         .sr(1)
4800         .m(2)
4801         .n(8)
4802         .k(k)
4803         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4804     }
4805   }
4806 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,k_gt_8_subtile)4807   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_gt_8_subtile) {
4808     TEST_REQUIRES_ARM_NEON_DOT;
4809     for (size_t k = 9; k < 16; k++) {
4810       for (uint32_t n = 1; n <= 8; n++) {
4811         for (uint32_t m = 1; m <= 2; m++) {
4812           GemmMicrokernelTester()
4813             .mr(2)
4814             .nr(8)
4815             .kr(4)
4816             .sr(1)
4817             .m(m)
4818             .n(n)
4819             .k(k)
4820             .iterations(1)
4821             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4822         }
4823       }
4824     }
4825   }
4826 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,k_div_8)4827   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_div_8) {
4828     TEST_REQUIRES_ARM_NEON_DOT;
4829     for (size_t k = 16; k <= 80; k += 8) {
4830       GemmMicrokernelTester()
4831         .mr(2)
4832         .nr(8)
4833         .kr(4)
4834         .sr(1)
4835         .m(2)
4836         .n(8)
4837         .k(k)
4838         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4839     }
4840   }
4841 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,k_div_8_subtile)4842   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_div_8_subtile) {
4843     TEST_REQUIRES_ARM_NEON_DOT;
4844     for (size_t k = 16; k <= 80; k += 8) {
4845       for (uint32_t n = 1; n <= 8; n++) {
4846         for (uint32_t m = 1; m <= 2; m++) {
4847           GemmMicrokernelTester()
4848             .mr(2)
4849             .nr(8)
4850             .kr(4)
4851             .sr(1)
4852             .m(m)
4853             .n(n)
4854             .k(k)
4855             .iterations(1)
4856             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4857         }
4858       }
4859     }
4860   }
4861 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,n_gt_8)4862   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, n_gt_8) {
4863     TEST_REQUIRES_ARM_NEON_DOT;
4864     for (uint32_t n = 9; n < 16; n++) {
4865       for (size_t k = 1; k <= 40; k += 9) {
4866         GemmMicrokernelTester()
4867           .mr(2)
4868           .nr(8)
4869           .kr(4)
4870           .sr(1)
4871           .m(2)
4872           .n(n)
4873           .k(k)
4874           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4875       }
4876     }
4877   }
4878 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,n_gt_8_strided_cn)4879   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, n_gt_8_strided_cn) {
4880     TEST_REQUIRES_ARM_NEON_DOT;
4881     for (uint32_t n = 9; n < 16; n++) {
4882       for (size_t k = 1; k <= 40; k += 9) {
4883         GemmMicrokernelTester()
4884           .mr(2)
4885           .nr(8)
4886           .kr(4)
4887           .sr(1)
4888           .m(2)
4889           .n(n)
4890           .k(k)
4891           .cn_stride(11)
4892           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4893       }
4894     }
4895   }
4896 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,n_gt_8_subtile)4897   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, n_gt_8_subtile) {
4898     TEST_REQUIRES_ARM_NEON_DOT;
4899     for (uint32_t n = 9; n < 16; n++) {
4900       for (size_t k = 1; k <= 40; k += 9) {
4901         for (uint32_t m = 1; m <= 2; m++) {
4902           GemmMicrokernelTester()
4903             .mr(2)
4904             .nr(8)
4905             .kr(4)
4906             .sr(1)
4907             .m(m)
4908             .n(n)
4909             .k(k)
4910             .iterations(1)
4911             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4912         }
4913       }
4914     }
4915   }
4916 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,n_div_8)4917   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, n_div_8) {
4918     TEST_REQUIRES_ARM_NEON_DOT;
4919     for (uint32_t n = 16; n <= 24; n += 8) {
4920       for (size_t k = 1; k <= 40; k += 9) {
4921         GemmMicrokernelTester()
4922           .mr(2)
4923           .nr(8)
4924           .kr(4)
4925           .sr(1)
4926           .m(2)
4927           .n(n)
4928           .k(k)
4929           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4930       }
4931     }
4932   }
4933 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,n_div_8_strided_cn)4934   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, n_div_8_strided_cn) {
4935     TEST_REQUIRES_ARM_NEON_DOT;
4936     for (uint32_t n = 16; n <= 24; n += 8) {
4937       for (size_t k = 1; k <= 40; k += 9) {
4938         GemmMicrokernelTester()
4939           .mr(2)
4940           .nr(8)
4941           .kr(4)
4942           .sr(1)
4943           .m(2)
4944           .n(n)
4945           .k(k)
4946           .cn_stride(11)
4947           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4948       }
4949     }
4950   }
4951 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,n_div_8_subtile)4952   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, n_div_8_subtile) {
4953     TEST_REQUIRES_ARM_NEON_DOT;
4954     for (uint32_t n = 16; n <= 24; n += 8) {
4955       for (size_t k = 1; k <= 40; k += 9) {
4956         for (uint32_t m = 1; m <= 2; m++) {
4957           GemmMicrokernelTester()
4958             .mr(2)
4959             .nr(8)
4960             .kr(4)
4961             .sr(1)
4962             .m(m)
4963             .n(n)
4964             .k(k)
4965             .iterations(1)
4966             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4967         }
4968       }
4969     }
4970   }
4971 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,small_kernel)4972   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, small_kernel) {
4973     TEST_REQUIRES_ARM_NEON_DOT;
4974     for (size_t k = 1; k <= 40; k += 9) {
4975       GemmMicrokernelTester()
4976         .mr(2)
4977         .nr(8)
4978         .kr(4)
4979         .sr(1)
4980         .m(2)
4981         .n(8)
4982         .k(k)
4983         .ks(3)
4984         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
4985     }
4986   }
4987 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,small_kernel_subtile)4988   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, small_kernel_subtile) {
4989     TEST_REQUIRES_ARM_NEON_DOT;
4990     for (size_t k = 1; k <= 40; k += 9) {
4991       for (uint32_t n = 1; n <= 8; n++) {
4992         for (uint32_t m = 1; m <= 2; m++) {
4993           GemmMicrokernelTester()
4994             .mr(2)
4995             .nr(8)
4996             .kr(4)
4997             .sr(1)
4998             .m(m)
4999             .n(n)
5000             .k(k)
5001             .ks(3)
5002             .iterations(1)
5003             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5004         }
5005       }
5006     }
5007   }
5008 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,n_gt_8_small_kernel)5009   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, n_gt_8_small_kernel) {
5010     TEST_REQUIRES_ARM_NEON_DOT;
5011     for (uint32_t n = 9; n < 16; n++) {
5012       for (size_t k = 1; k <= 40; k += 9) {
5013         GemmMicrokernelTester()
5014           .mr(2)
5015           .nr(8)
5016           .kr(4)
5017           .sr(1)
5018           .m(2)
5019           .n(n)
5020           .k(k)
5021           .ks(3)
5022           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5023       }
5024     }
5025   }
5026 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,n_div_8_small_kernel)5027   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, n_div_8_small_kernel) {
5028     TEST_REQUIRES_ARM_NEON_DOT;
5029     for (uint32_t n = 16; n <= 24; n += 8) {
5030       for (size_t k = 1; k <= 40; k += 9) {
5031         GemmMicrokernelTester()
5032           .mr(2)
5033           .nr(8)
5034           .kr(4)
5035           .sr(1)
5036           .m(2)
5037           .n(n)
5038           .k(k)
5039           .ks(3)
5040           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5041       }
5042     }
5043   }
5044 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,strided_cm_subtile)5045   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, strided_cm_subtile) {
5046     TEST_REQUIRES_ARM_NEON_DOT;
5047     for (size_t k = 1; k <= 40; k += 9) {
5048       for (uint32_t n = 1; n <= 8; n++) {
5049         for (uint32_t m = 1; m <= 2; m++) {
5050           GemmMicrokernelTester()
5051             .mr(2)
5052             .nr(8)
5053             .kr(4)
5054             .sr(1)
5055             .m(m)
5056             .n(n)
5057             .k(k)
5058             .cm_stride(11)
5059             .iterations(1)
5060             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5061         }
5062       }
5063     }
5064   }
5065 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,a_offset)5066   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, a_offset) {
5067     TEST_REQUIRES_ARM_NEON_DOT;
5068     for (size_t k = 1; k <= 40; k += 9) {
5069       GemmMicrokernelTester()
5070         .mr(2)
5071         .nr(8)
5072         .kr(4)
5073         .sr(1)
5074         .m(2)
5075         .n(8)
5076         .k(k)
5077         .ks(3)
5078         .a_offset(83)
5079         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5080     }
5081   }
5082 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,zero)5083   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, zero) {
5084     TEST_REQUIRES_ARM_NEON_DOT;
5085     for (size_t k = 1; k <= 40; k += 9) {
5086       for (uint32_t mz = 0; mz < 2; mz++) {
5087         GemmMicrokernelTester()
5088           .mr(2)
5089           .nr(8)
5090           .kr(4)
5091           .sr(1)
5092           .m(2)
5093           .n(8)
5094           .k(k)
5095           .ks(3)
5096           .a_offset(83)
5097           .zero_index(mz)
5098           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5099       }
5100     }
5101   }
5102 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,qmin)5103   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, qmin) {
5104     TEST_REQUIRES_ARM_NEON_DOT;
5105     GemmMicrokernelTester()
5106       .mr(2)
5107       .nr(8)
5108       .kr(4)
5109       .sr(1)
5110       .m(2)
5111       .n(8)
5112       .k(8)
5113       .qmin(128)
5114       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5115   }
5116 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,qmax)5117   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, qmax) {
5118     TEST_REQUIRES_ARM_NEON_DOT;
5119     GemmMicrokernelTester()
5120       .mr(2)
5121       .nr(8)
5122       .kr(4)
5123       .sr(1)
5124       .m(2)
5125       .n(8)
5126       .k(8)
5127       .qmax(128)
5128       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5129   }
5130 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,strided_cm)5131   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, strided_cm) {
5132     TEST_REQUIRES_ARM_NEON_DOT;
5133     GemmMicrokernelTester()
5134       .mr(2)
5135       .nr(8)
5136       .kr(4)
5137       .sr(1)
5138       .m(2)
5139       .n(8)
5140       .k(8)
5141       .cm_stride(11)
5142       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5143   }
5144 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,no_a_zero_point)5145   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, no_a_zero_point) {
5146     TEST_REQUIRES_ARM_NEON_DOT;
5147     for (size_t k = 1; k <= 40; k += 9) {
5148       GemmMicrokernelTester()
5149         .mr(2)
5150         .nr(8)
5151         .kr(4)
5152         .sr(1)
5153         .m(2)
5154         .n(8)
5155         .k(k)
5156         .a_zero_point(0)
5157         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5158     }
5159   }
5160 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,no_b_zero_point)5161   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, no_b_zero_point) {
5162     TEST_REQUIRES_ARM_NEON_DOT;
5163     for (size_t k = 1; k <= 40; k += 9) {
5164       GemmMicrokernelTester()
5165         .mr(2)
5166         .nr(8)
5167         .kr(4)
5168         .sr(1)
5169         .m(2)
5170         .n(8)
5171         .k(k)
5172         .b_zero_point(0)
5173         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5174     }
5175   }
5176 
TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,no_zero_point)5177   TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, no_zero_point) {
5178     TEST_REQUIRES_ARM_NEON_DOT;
5179     for (size_t k = 1; k <= 40; k += 9) {
5180       GemmMicrokernelTester()
5181         .mr(2)
5182         .nr(8)
5183         .kr(4)
5184         .sr(1)
5185         .m(2)
5186         .n(8)
5187         .k(k)
5188         .a_zero_point(0)
5189         .b_zero_point(0)
5190         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5191     }
5192   }
5193 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
5194 
5195 
5196 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,k_eq_8)5197   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_eq_8) {
5198     TEST_REQUIRES_ARM_NEON_DOT;
5199     GemmMicrokernelTester()
5200       .mr(3)
5201       .nr(8)
5202       .kr(4)
5203       .sr(1)
5204       .m(3)
5205       .n(8)
5206       .k(8)
5207       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5208   }
5209 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,strided_cn)5210   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, strided_cn) {
5211     TEST_REQUIRES_ARM_NEON_DOT;
5212     GemmMicrokernelTester()
5213       .mr(3)
5214       .nr(8)
5215       .kr(4)
5216       .sr(1)
5217       .m(3)
5218       .n(8)
5219       .k(8)
5220       .cn_stride(11)
5221       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5222   }
5223 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,k_eq_8_subtile)5224   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_eq_8_subtile) {
5225     TEST_REQUIRES_ARM_NEON_DOT;
5226     for (uint32_t n = 1; n <= 8; n++) {
5227       for (uint32_t m = 1; m <= 3; m++) {
5228         GemmMicrokernelTester()
5229           .mr(3)
5230           .nr(8)
5231           .kr(4)
5232           .sr(1)
5233           .m(m)
5234           .n(n)
5235           .k(8)
5236           .iterations(1)
5237           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5238       }
5239     }
5240   }
5241 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,k_eq_8_subtile_m)5242   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_eq_8_subtile_m) {
5243     TEST_REQUIRES_ARM_NEON_DOT;
5244     for (uint32_t m = 1; m <= 3; m++) {
5245       GemmMicrokernelTester()
5246         .mr(3)
5247         .nr(8)
5248         .kr(4)
5249         .sr(1)
5250         .m(m)
5251         .n(8)
5252         .k(8)
5253         .iterations(1)
5254         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5255     }
5256   }
5257 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,k_eq_8_subtile_n)5258   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_eq_8_subtile_n) {
5259     TEST_REQUIRES_ARM_NEON_DOT;
5260     for (uint32_t n = 1; n <= 8; n++) {
5261       GemmMicrokernelTester()
5262         .mr(3)
5263         .nr(8)
5264         .kr(4)
5265         .sr(1)
5266         .m(3)
5267         .n(n)
5268         .k(8)
5269         .iterations(1)
5270         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5271     }
5272   }
5273 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,k_lt_8)5274   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_lt_8) {
5275     TEST_REQUIRES_ARM_NEON_DOT;
5276     for (size_t k = 1; k < 8; k++) {
5277       GemmMicrokernelTester()
5278         .mr(3)
5279         .nr(8)
5280         .kr(4)
5281         .sr(1)
5282         .m(3)
5283         .n(8)
5284         .k(k)
5285         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5286     }
5287   }
5288 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,k_lt_8_subtile)5289   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_lt_8_subtile) {
5290     TEST_REQUIRES_ARM_NEON_DOT;
5291     for (size_t k = 1; k < 8; k++) {
5292       for (uint32_t n = 1; n <= 8; n++) {
5293         for (uint32_t m = 1; m <= 3; m++) {
5294           GemmMicrokernelTester()
5295             .mr(3)
5296             .nr(8)
5297             .kr(4)
5298             .sr(1)
5299             .m(m)
5300             .n(n)
5301             .k(k)
5302             .iterations(1)
5303             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5304         }
5305       }
5306     }
5307   }
5308 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,k_gt_8)5309   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_gt_8) {
5310     TEST_REQUIRES_ARM_NEON_DOT;
5311     for (size_t k = 9; k < 16; k++) {
5312       GemmMicrokernelTester()
5313         .mr(3)
5314         .nr(8)
5315         .kr(4)
5316         .sr(1)
5317         .m(3)
5318         .n(8)
5319         .k(k)
5320         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5321     }
5322   }
5323 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,k_gt_8_subtile)5324   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_gt_8_subtile) {
5325     TEST_REQUIRES_ARM_NEON_DOT;
5326     for (size_t k = 9; k < 16; k++) {
5327       for (uint32_t n = 1; n <= 8; n++) {
5328         for (uint32_t m = 1; m <= 3; m++) {
5329           GemmMicrokernelTester()
5330             .mr(3)
5331             .nr(8)
5332             .kr(4)
5333             .sr(1)
5334             .m(m)
5335             .n(n)
5336             .k(k)
5337             .iterations(1)
5338             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5339         }
5340       }
5341     }
5342   }
5343 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,k_div_8)5344   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_div_8) {
5345     TEST_REQUIRES_ARM_NEON_DOT;
5346     for (size_t k = 16; k <= 80; k += 8) {
5347       GemmMicrokernelTester()
5348         .mr(3)
5349         .nr(8)
5350         .kr(4)
5351         .sr(1)
5352         .m(3)
5353         .n(8)
5354         .k(k)
5355         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5356     }
5357   }
5358 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,k_div_8_subtile)5359   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_div_8_subtile) {
5360     TEST_REQUIRES_ARM_NEON_DOT;
5361     for (size_t k = 16; k <= 80; k += 8) {
5362       for (uint32_t n = 1; n <= 8; n++) {
5363         for (uint32_t m = 1; m <= 3; m++) {
5364           GemmMicrokernelTester()
5365             .mr(3)
5366             .nr(8)
5367             .kr(4)
5368             .sr(1)
5369             .m(m)
5370             .n(n)
5371             .k(k)
5372             .iterations(1)
5373             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5374         }
5375       }
5376     }
5377   }
5378 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,n_gt_8)5379   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, n_gt_8) {
5380     TEST_REQUIRES_ARM_NEON_DOT;
5381     for (uint32_t n = 9; n < 16; n++) {
5382       for (size_t k = 1; k <= 40; k += 9) {
5383         GemmMicrokernelTester()
5384           .mr(3)
5385           .nr(8)
5386           .kr(4)
5387           .sr(1)
5388           .m(3)
5389           .n(n)
5390           .k(k)
5391           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5392       }
5393     }
5394   }
5395 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,n_gt_8_strided_cn)5396   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, n_gt_8_strided_cn) {
5397     TEST_REQUIRES_ARM_NEON_DOT;
5398     for (uint32_t n = 9; n < 16; n++) {
5399       for (size_t k = 1; k <= 40; k += 9) {
5400         GemmMicrokernelTester()
5401           .mr(3)
5402           .nr(8)
5403           .kr(4)
5404           .sr(1)
5405           .m(3)
5406           .n(n)
5407           .k(k)
5408           .cn_stride(11)
5409           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5410       }
5411     }
5412   }
5413 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,n_gt_8_subtile)5414   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, n_gt_8_subtile) {
5415     TEST_REQUIRES_ARM_NEON_DOT;
5416     for (uint32_t n = 9; n < 16; n++) {
5417       for (size_t k = 1; k <= 40; k += 9) {
5418         for (uint32_t m = 1; m <= 3; m++) {
5419           GemmMicrokernelTester()
5420             .mr(3)
5421             .nr(8)
5422             .kr(4)
5423             .sr(1)
5424             .m(m)
5425             .n(n)
5426             .k(k)
5427             .iterations(1)
5428             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5429         }
5430       }
5431     }
5432   }
5433 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,n_div_8)5434   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, n_div_8) {
5435     TEST_REQUIRES_ARM_NEON_DOT;
5436     for (uint32_t n = 16; n <= 24; n += 8) {
5437       for (size_t k = 1; k <= 40; k += 9) {
5438         GemmMicrokernelTester()
5439           .mr(3)
5440           .nr(8)
5441           .kr(4)
5442           .sr(1)
5443           .m(3)
5444           .n(n)
5445           .k(k)
5446           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5447       }
5448     }
5449   }
5450 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,n_div_8_strided_cn)5451   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, n_div_8_strided_cn) {
5452     TEST_REQUIRES_ARM_NEON_DOT;
5453     for (uint32_t n = 16; n <= 24; n += 8) {
5454       for (size_t k = 1; k <= 40; k += 9) {
5455         GemmMicrokernelTester()
5456           .mr(3)
5457           .nr(8)
5458           .kr(4)
5459           .sr(1)
5460           .m(3)
5461           .n(n)
5462           .k(k)
5463           .cn_stride(11)
5464           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5465       }
5466     }
5467   }
5468 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,n_div_8_subtile)5469   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, n_div_8_subtile) {
5470     TEST_REQUIRES_ARM_NEON_DOT;
5471     for (uint32_t n = 16; n <= 24; n += 8) {
5472       for (size_t k = 1; k <= 40; k += 9) {
5473         for (uint32_t m = 1; m <= 3; m++) {
5474           GemmMicrokernelTester()
5475             .mr(3)
5476             .nr(8)
5477             .kr(4)
5478             .sr(1)
5479             .m(m)
5480             .n(n)
5481             .k(k)
5482             .iterations(1)
5483             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5484         }
5485       }
5486     }
5487   }
5488 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,small_kernel)5489   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, small_kernel) {
5490     TEST_REQUIRES_ARM_NEON_DOT;
5491     for (size_t k = 1; k <= 40; k += 9) {
5492       GemmMicrokernelTester()
5493         .mr(3)
5494         .nr(8)
5495         .kr(4)
5496         .sr(1)
5497         .m(3)
5498         .n(8)
5499         .k(k)
5500         .ks(3)
5501         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5502     }
5503   }
5504 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,small_kernel_subtile)5505   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, small_kernel_subtile) {
5506     TEST_REQUIRES_ARM_NEON_DOT;
5507     for (size_t k = 1; k <= 40; k += 9) {
5508       for (uint32_t n = 1; n <= 8; n++) {
5509         for (uint32_t m = 1; m <= 3; m++) {
5510           GemmMicrokernelTester()
5511             .mr(3)
5512             .nr(8)
5513             .kr(4)
5514             .sr(1)
5515             .m(m)
5516             .n(n)
5517             .k(k)
5518             .ks(3)
5519             .iterations(1)
5520             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5521         }
5522       }
5523     }
5524   }
5525 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,n_gt_8_small_kernel)5526   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, n_gt_8_small_kernel) {
5527     TEST_REQUIRES_ARM_NEON_DOT;
5528     for (uint32_t n = 9; n < 16; n++) {
5529       for (size_t k = 1; k <= 40; k += 9) {
5530         GemmMicrokernelTester()
5531           .mr(3)
5532           .nr(8)
5533           .kr(4)
5534           .sr(1)
5535           .m(3)
5536           .n(n)
5537           .k(k)
5538           .ks(3)
5539           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5540       }
5541     }
5542   }
5543 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,n_div_8_small_kernel)5544   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, n_div_8_small_kernel) {
5545     TEST_REQUIRES_ARM_NEON_DOT;
5546     for (uint32_t n = 16; n <= 24; n += 8) {
5547       for (size_t k = 1; k <= 40; k += 9) {
5548         GemmMicrokernelTester()
5549           .mr(3)
5550           .nr(8)
5551           .kr(4)
5552           .sr(1)
5553           .m(3)
5554           .n(n)
5555           .k(k)
5556           .ks(3)
5557           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5558       }
5559     }
5560   }
5561 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,strided_cm_subtile)5562   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, strided_cm_subtile) {
5563     TEST_REQUIRES_ARM_NEON_DOT;
5564     for (size_t k = 1; k <= 40; k += 9) {
5565       for (uint32_t n = 1; n <= 8; n++) {
5566         for (uint32_t m = 1; m <= 3; m++) {
5567           GemmMicrokernelTester()
5568             .mr(3)
5569             .nr(8)
5570             .kr(4)
5571             .sr(1)
5572             .m(m)
5573             .n(n)
5574             .k(k)
5575             .cm_stride(11)
5576             .iterations(1)
5577             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5578         }
5579       }
5580     }
5581   }
5582 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,a_offset)5583   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, a_offset) {
5584     TEST_REQUIRES_ARM_NEON_DOT;
5585     for (size_t k = 1; k <= 40; k += 9) {
5586       GemmMicrokernelTester()
5587         .mr(3)
5588         .nr(8)
5589         .kr(4)
5590         .sr(1)
5591         .m(3)
5592         .n(8)
5593         .k(k)
5594         .ks(3)
5595         .a_offset(127)
5596         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5597     }
5598   }
5599 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,zero)5600   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, zero) {
5601     TEST_REQUIRES_ARM_NEON_DOT;
5602     for (size_t k = 1; k <= 40; k += 9) {
5603       for (uint32_t mz = 0; mz < 3; mz++) {
5604         GemmMicrokernelTester()
5605           .mr(3)
5606           .nr(8)
5607           .kr(4)
5608           .sr(1)
5609           .m(3)
5610           .n(8)
5611           .k(k)
5612           .ks(3)
5613           .a_offset(127)
5614           .zero_index(mz)
5615           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5616       }
5617     }
5618   }
5619 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,qmin)5620   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, qmin) {
5621     TEST_REQUIRES_ARM_NEON_DOT;
5622     GemmMicrokernelTester()
5623       .mr(3)
5624       .nr(8)
5625       .kr(4)
5626       .sr(1)
5627       .m(3)
5628       .n(8)
5629       .k(8)
5630       .qmin(128)
5631       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5632   }
5633 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,qmax)5634   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, qmax) {
5635     TEST_REQUIRES_ARM_NEON_DOT;
5636     GemmMicrokernelTester()
5637       .mr(3)
5638       .nr(8)
5639       .kr(4)
5640       .sr(1)
5641       .m(3)
5642       .n(8)
5643       .k(8)
5644       .qmax(128)
5645       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5646   }
5647 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,strided_cm)5648   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, strided_cm) {
5649     TEST_REQUIRES_ARM_NEON_DOT;
5650     GemmMicrokernelTester()
5651       .mr(3)
5652       .nr(8)
5653       .kr(4)
5654       .sr(1)
5655       .m(3)
5656       .n(8)
5657       .k(8)
5658       .cm_stride(11)
5659       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5660   }
5661 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,no_a_zero_point)5662   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, no_a_zero_point) {
5663     TEST_REQUIRES_ARM_NEON_DOT;
5664     for (size_t k = 1; k <= 40; k += 9) {
5665       GemmMicrokernelTester()
5666         .mr(3)
5667         .nr(8)
5668         .kr(4)
5669         .sr(1)
5670         .m(3)
5671         .n(8)
5672         .k(k)
5673         .a_zero_point(0)
5674         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5675     }
5676   }
5677 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,no_b_zero_point)5678   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, no_b_zero_point) {
5679     TEST_REQUIRES_ARM_NEON_DOT;
5680     for (size_t k = 1; k <= 40; k += 9) {
5681       GemmMicrokernelTester()
5682         .mr(3)
5683         .nr(8)
5684         .kr(4)
5685         .sr(1)
5686         .m(3)
5687         .n(8)
5688         .k(k)
5689         .b_zero_point(0)
5690         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5691     }
5692   }
5693 
TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,no_zero_point)5694   TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, no_zero_point) {
5695     TEST_REQUIRES_ARM_NEON_DOT;
5696     for (size_t k = 1; k <= 40; k += 9) {
5697       GemmMicrokernelTester()
5698         .mr(3)
5699         .nr(8)
5700         .kr(4)
5701         .sr(1)
5702         .m(3)
5703         .n(8)
5704         .k(k)
5705         .a_zero_point(0)
5706         .b_zero_point(0)
5707         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5708     }
5709   }
5710 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
5711 
5712 
5713 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,k_eq_8)5714   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_eq_8) {
5715     TEST_REQUIRES_ARM_NEON_DOT;
5716     GemmMicrokernelTester()
5717       .mr(4)
5718       .nr(8)
5719       .kr(4)
5720       .sr(1)
5721       .m(4)
5722       .n(8)
5723       .k(8)
5724       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5725   }
5726 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,strided_cn)5727   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, strided_cn) {
5728     TEST_REQUIRES_ARM_NEON_DOT;
5729     GemmMicrokernelTester()
5730       .mr(4)
5731       .nr(8)
5732       .kr(4)
5733       .sr(1)
5734       .m(4)
5735       .n(8)
5736       .k(8)
5737       .cn_stride(11)
5738       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5739   }
5740 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,k_eq_8_subtile)5741   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_eq_8_subtile) {
5742     TEST_REQUIRES_ARM_NEON_DOT;
5743     for (uint32_t n = 1; n <= 8; n++) {
5744       for (uint32_t m = 1; m <= 4; m++) {
5745         GemmMicrokernelTester()
5746           .mr(4)
5747           .nr(8)
5748           .kr(4)
5749           .sr(1)
5750           .m(m)
5751           .n(n)
5752           .k(8)
5753           .iterations(1)
5754           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5755       }
5756     }
5757   }
5758 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,k_eq_8_subtile_m)5759   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_eq_8_subtile_m) {
5760     TEST_REQUIRES_ARM_NEON_DOT;
5761     for (uint32_t m = 1; m <= 4; m++) {
5762       GemmMicrokernelTester()
5763         .mr(4)
5764         .nr(8)
5765         .kr(4)
5766         .sr(1)
5767         .m(m)
5768         .n(8)
5769         .k(8)
5770         .iterations(1)
5771         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5772     }
5773   }
5774 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,k_eq_8_subtile_n)5775   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_eq_8_subtile_n) {
5776     TEST_REQUIRES_ARM_NEON_DOT;
5777     for (uint32_t n = 1; n <= 8; n++) {
5778       GemmMicrokernelTester()
5779         .mr(4)
5780         .nr(8)
5781         .kr(4)
5782         .sr(1)
5783         .m(4)
5784         .n(n)
5785         .k(8)
5786         .iterations(1)
5787         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5788     }
5789   }
5790 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,k_lt_8)5791   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_lt_8) {
5792     TEST_REQUIRES_ARM_NEON_DOT;
5793     for (size_t k = 1; k < 8; k++) {
5794       GemmMicrokernelTester()
5795         .mr(4)
5796         .nr(8)
5797         .kr(4)
5798         .sr(1)
5799         .m(4)
5800         .n(8)
5801         .k(k)
5802         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5803     }
5804   }
5805 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,k_lt_8_subtile)5806   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_lt_8_subtile) {
5807     TEST_REQUIRES_ARM_NEON_DOT;
5808     for (size_t k = 1; k < 8; k++) {
5809       for (uint32_t n = 1; n <= 8; n++) {
5810         for (uint32_t m = 1; m <= 4; m++) {
5811           GemmMicrokernelTester()
5812             .mr(4)
5813             .nr(8)
5814             .kr(4)
5815             .sr(1)
5816             .m(m)
5817             .n(n)
5818             .k(k)
5819             .iterations(1)
5820             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5821         }
5822       }
5823     }
5824   }
5825 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,k_gt_8)5826   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_gt_8) {
5827     TEST_REQUIRES_ARM_NEON_DOT;
5828     for (size_t k = 9; k < 16; k++) {
5829       GemmMicrokernelTester()
5830         .mr(4)
5831         .nr(8)
5832         .kr(4)
5833         .sr(1)
5834         .m(4)
5835         .n(8)
5836         .k(k)
5837         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5838     }
5839   }
5840 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,k_gt_8_subtile)5841   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_gt_8_subtile) {
5842     TEST_REQUIRES_ARM_NEON_DOT;
5843     for (size_t k = 9; k < 16; k++) {
5844       for (uint32_t n = 1; n <= 8; n++) {
5845         for (uint32_t m = 1; m <= 4; m++) {
5846           GemmMicrokernelTester()
5847             .mr(4)
5848             .nr(8)
5849             .kr(4)
5850             .sr(1)
5851             .m(m)
5852             .n(n)
5853             .k(k)
5854             .iterations(1)
5855             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5856         }
5857       }
5858     }
5859   }
5860 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,k_div_8)5861   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_div_8) {
5862     TEST_REQUIRES_ARM_NEON_DOT;
5863     for (size_t k = 16; k <= 80; k += 8) {
5864       GemmMicrokernelTester()
5865         .mr(4)
5866         .nr(8)
5867         .kr(4)
5868         .sr(1)
5869         .m(4)
5870         .n(8)
5871         .k(k)
5872         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5873     }
5874   }
5875 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,k_div_8_subtile)5876   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_div_8_subtile) {
5877     TEST_REQUIRES_ARM_NEON_DOT;
5878     for (size_t k = 16; k <= 80; k += 8) {
5879       for (uint32_t n = 1; n <= 8; n++) {
5880         for (uint32_t m = 1; m <= 4; m++) {
5881           GemmMicrokernelTester()
5882             .mr(4)
5883             .nr(8)
5884             .kr(4)
5885             .sr(1)
5886             .m(m)
5887             .n(n)
5888             .k(k)
5889             .iterations(1)
5890             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5891         }
5892       }
5893     }
5894   }
5895 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,n_gt_8)5896   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_gt_8) {
5897     TEST_REQUIRES_ARM_NEON_DOT;
5898     for (uint32_t n = 9; n < 16; n++) {
5899       for (size_t k = 1; k <= 40; k += 9) {
5900         GemmMicrokernelTester()
5901           .mr(4)
5902           .nr(8)
5903           .kr(4)
5904           .sr(1)
5905           .m(4)
5906           .n(n)
5907           .k(k)
5908           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5909       }
5910     }
5911   }
5912 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,n_gt_8_strided_cn)5913   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_gt_8_strided_cn) {
5914     TEST_REQUIRES_ARM_NEON_DOT;
5915     for (uint32_t n = 9; n < 16; n++) {
5916       for (size_t k = 1; k <= 40; k += 9) {
5917         GemmMicrokernelTester()
5918           .mr(4)
5919           .nr(8)
5920           .kr(4)
5921           .sr(1)
5922           .m(4)
5923           .n(n)
5924           .k(k)
5925           .cn_stride(11)
5926           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5927       }
5928     }
5929   }
5930 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,n_gt_8_subtile)5931   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_gt_8_subtile) {
5932     TEST_REQUIRES_ARM_NEON_DOT;
5933     for (uint32_t n = 9; n < 16; n++) {
5934       for (size_t k = 1; k <= 40; k += 9) {
5935         for (uint32_t m = 1; m <= 4; m++) {
5936           GemmMicrokernelTester()
5937             .mr(4)
5938             .nr(8)
5939             .kr(4)
5940             .sr(1)
5941             .m(m)
5942             .n(n)
5943             .k(k)
5944             .iterations(1)
5945             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5946         }
5947       }
5948     }
5949   }
5950 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,n_div_8)5951   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_div_8) {
5952     TEST_REQUIRES_ARM_NEON_DOT;
5953     for (uint32_t n = 16; n <= 24; n += 8) {
5954       for (size_t k = 1; k <= 40; k += 9) {
5955         GemmMicrokernelTester()
5956           .mr(4)
5957           .nr(8)
5958           .kr(4)
5959           .sr(1)
5960           .m(4)
5961           .n(n)
5962           .k(k)
5963           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5964       }
5965     }
5966   }
5967 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,n_div_8_strided_cn)5968   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_div_8_strided_cn) {
5969     TEST_REQUIRES_ARM_NEON_DOT;
5970     for (uint32_t n = 16; n <= 24; n += 8) {
5971       for (size_t k = 1; k <= 40; k += 9) {
5972         GemmMicrokernelTester()
5973           .mr(4)
5974           .nr(8)
5975           .kr(4)
5976           .sr(1)
5977           .m(4)
5978           .n(n)
5979           .k(k)
5980           .cn_stride(11)
5981           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
5982       }
5983     }
5984   }
5985 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,n_div_8_subtile)5986   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_div_8_subtile) {
5987     TEST_REQUIRES_ARM_NEON_DOT;
5988     for (uint32_t n = 16; n <= 24; n += 8) {
5989       for (size_t k = 1; k <= 40; k += 9) {
5990         for (uint32_t m = 1; m <= 4; m++) {
5991           GemmMicrokernelTester()
5992             .mr(4)
5993             .nr(8)
5994             .kr(4)
5995             .sr(1)
5996             .m(m)
5997             .n(n)
5998             .k(k)
5999             .iterations(1)
6000             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6001         }
6002       }
6003     }
6004   }
6005 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,small_kernel)6006   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, small_kernel) {
6007     TEST_REQUIRES_ARM_NEON_DOT;
6008     for (size_t k = 1; k <= 40; k += 9) {
6009       GemmMicrokernelTester()
6010         .mr(4)
6011         .nr(8)
6012         .kr(4)
6013         .sr(1)
6014         .m(4)
6015         .n(8)
6016         .k(k)
6017         .ks(3)
6018         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6019     }
6020   }
6021 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,small_kernel_subtile)6022   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, small_kernel_subtile) {
6023     TEST_REQUIRES_ARM_NEON_DOT;
6024     for (size_t k = 1; k <= 40; k += 9) {
6025       for (uint32_t n = 1; n <= 8; n++) {
6026         for (uint32_t m = 1; m <= 4; m++) {
6027           GemmMicrokernelTester()
6028             .mr(4)
6029             .nr(8)
6030             .kr(4)
6031             .sr(1)
6032             .m(m)
6033             .n(n)
6034             .k(k)
6035             .ks(3)
6036             .iterations(1)
6037             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6038         }
6039       }
6040     }
6041   }
6042 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,n_gt_8_small_kernel)6043   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_gt_8_small_kernel) {
6044     TEST_REQUIRES_ARM_NEON_DOT;
6045     for (uint32_t n = 9; n < 16; n++) {
6046       for (size_t k = 1; k <= 40; k += 9) {
6047         GemmMicrokernelTester()
6048           .mr(4)
6049           .nr(8)
6050           .kr(4)
6051           .sr(1)
6052           .m(4)
6053           .n(n)
6054           .k(k)
6055           .ks(3)
6056           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6057       }
6058     }
6059   }
6060 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,n_div_8_small_kernel)6061   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_div_8_small_kernel) {
6062     TEST_REQUIRES_ARM_NEON_DOT;
6063     for (uint32_t n = 16; n <= 24; n += 8) {
6064       for (size_t k = 1; k <= 40; k += 9) {
6065         GemmMicrokernelTester()
6066           .mr(4)
6067           .nr(8)
6068           .kr(4)
6069           .sr(1)
6070           .m(4)
6071           .n(n)
6072           .k(k)
6073           .ks(3)
6074           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6075       }
6076     }
6077   }
6078 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,strided_cm_subtile)6079   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, strided_cm_subtile) {
6080     TEST_REQUIRES_ARM_NEON_DOT;
6081     for (size_t k = 1; k <= 40; k += 9) {
6082       for (uint32_t n = 1; n <= 8; n++) {
6083         for (uint32_t m = 1; m <= 4; m++) {
6084           GemmMicrokernelTester()
6085             .mr(4)
6086             .nr(8)
6087             .kr(4)
6088             .sr(1)
6089             .m(m)
6090             .n(n)
6091             .k(k)
6092             .cm_stride(11)
6093             .iterations(1)
6094             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6095         }
6096       }
6097     }
6098   }
6099 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,a_offset)6100   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, a_offset) {
6101     TEST_REQUIRES_ARM_NEON_DOT;
6102     for (size_t k = 1; k <= 40; k += 9) {
6103       GemmMicrokernelTester()
6104         .mr(4)
6105         .nr(8)
6106         .kr(4)
6107         .sr(1)
6108         .m(4)
6109         .n(8)
6110         .k(k)
6111         .ks(3)
6112         .a_offset(163)
6113         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6114     }
6115   }
6116 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,zero)6117   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, zero) {
6118     TEST_REQUIRES_ARM_NEON_DOT;
6119     for (size_t k = 1; k <= 40; k += 9) {
6120       for (uint32_t mz = 0; mz < 4; mz++) {
6121         GemmMicrokernelTester()
6122           .mr(4)
6123           .nr(8)
6124           .kr(4)
6125           .sr(1)
6126           .m(4)
6127           .n(8)
6128           .k(k)
6129           .ks(3)
6130           .a_offset(163)
6131           .zero_index(mz)
6132           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6133       }
6134     }
6135   }
6136 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,qmin)6137   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, qmin) {
6138     TEST_REQUIRES_ARM_NEON_DOT;
6139     GemmMicrokernelTester()
6140       .mr(4)
6141       .nr(8)
6142       .kr(4)
6143       .sr(1)
6144       .m(4)
6145       .n(8)
6146       .k(8)
6147       .qmin(128)
6148       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6149   }
6150 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,qmax)6151   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, qmax) {
6152     TEST_REQUIRES_ARM_NEON_DOT;
6153     GemmMicrokernelTester()
6154       .mr(4)
6155       .nr(8)
6156       .kr(4)
6157       .sr(1)
6158       .m(4)
6159       .n(8)
6160       .k(8)
6161       .qmax(128)
6162       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6163   }
6164 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,strided_cm)6165   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, strided_cm) {
6166     TEST_REQUIRES_ARM_NEON_DOT;
6167     GemmMicrokernelTester()
6168       .mr(4)
6169       .nr(8)
6170       .kr(4)
6171       .sr(1)
6172       .m(4)
6173       .n(8)
6174       .k(8)
6175       .cm_stride(11)
6176       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6177   }
6178 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,no_a_zero_point)6179   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, no_a_zero_point) {
6180     TEST_REQUIRES_ARM_NEON_DOT;
6181     for (size_t k = 1; k <= 40; k += 9) {
6182       GemmMicrokernelTester()
6183         .mr(4)
6184         .nr(8)
6185         .kr(4)
6186         .sr(1)
6187         .m(4)
6188         .n(8)
6189         .k(k)
6190         .a_zero_point(0)
6191         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6192     }
6193   }
6194 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,no_b_zero_point)6195   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, no_b_zero_point) {
6196     TEST_REQUIRES_ARM_NEON_DOT;
6197     for (size_t k = 1; k <= 40; k += 9) {
6198       GemmMicrokernelTester()
6199         .mr(4)
6200         .nr(8)
6201         .kr(4)
6202         .sr(1)
6203         .m(4)
6204         .n(8)
6205         .k(k)
6206         .b_zero_point(0)
6207         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6208     }
6209   }
6210 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,no_zero_point)6211   TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, no_zero_point) {
6212     TEST_REQUIRES_ARM_NEON_DOT;
6213     for (size_t k = 1; k <= 40; k += 9) {
6214       GemmMicrokernelTester()
6215         .mr(4)
6216         .nr(8)
6217         .kr(4)
6218         .sr(1)
6219         .m(4)
6220         .n(8)
6221         .k(k)
6222         .a_zero_point(0)
6223         .b_zero_point(0)
6224         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6225     }
6226   }
6227 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
6228 
6229 
6230 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,k_eq_8)6231   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_eq_8) {
6232     TEST_REQUIRES_ARM_NEON_DOT;
6233     GemmMicrokernelTester()
6234       .mr(5)
6235       .nr(8)
6236       .kr(4)
6237       .sr(1)
6238       .m(5)
6239       .n(8)
6240       .k(8)
6241       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6242   }
6243 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,strided_cn)6244   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, strided_cn) {
6245     TEST_REQUIRES_ARM_NEON_DOT;
6246     GemmMicrokernelTester()
6247       .mr(5)
6248       .nr(8)
6249       .kr(4)
6250       .sr(1)
6251       .m(5)
6252       .n(8)
6253       .k(8)
6254       .cn_stride(11)
6255       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6256   }
6257 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,k_eq_8_subtile)6258   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_eq_8_subtile) {
6259     TEST_REQUIRES_ARM_NEON_DOT;
6260     for (uint32_t n = 1; n <= 8; n++) {
6261       for (uint32_t m = 1; m <= 5; m++) {
6262         GemmMicrokernelTester()
6263           .mr(5)
6264           .nr(8)
6265           .kr(4)
6266           .sr(1)
6267           .m(m)
6268           .n(n)
6269           .k(8)
6270           .iterations(1)
6271           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6272       }
6273     }
6274   }
6275 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,k_eq_8_subtile_m)6276   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_eq_8_subtile_m) {
6277     TEST_REQUIRES_ARM_NEON_DOT;
6278     for (uint32_t m = 1; m <= 5; m++) {
6279       GemmMicrokernelTester()
6280         .mr(5)
6281         .nr(8)
6282         .kr(4)
6283         .sr(1)
6284         .m(m)
6285         .n(8)
6286         .k(8)
6287         .iterations(1)
6288         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6289     }
6290   }
6291 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,k_eq_8_subtile_n)6292   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_eq_8_subtile_n) {
6293     TEST_REQUIRES_ARM_NEON_DOT;
6294     for (uint32_t n = 1; n <= 8; n++) {
6295       GemmMicrokernelTester()
6296         .mr(5)
6297         .nr(8)
6298         .kr(4)
6299         .sr(1)
6300         .m(5)
6301         .n(n)
6302         .k(8)
6303         .iterations(1)
6304         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6305     }
6306   }
6307 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,k_lt_8)6308   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_lt_8) {
6309     TEST_REQUIRES_ARM_NEON_DOT;
6310     for (size_t k = 1; k < 8; k++) {
6311       GemmMicrokernelTester()
6312         .mr(5)
6313         .nr(8)
6314         .kr(4)
6315         .sr(1)
6316         .m(5)
6317         .n(8)
6318         .k(k)
6319         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6320     }
6321   }
6322 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,k_lt_8_subtile)6323   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_lt_8_subtile) {
6324     TEST_REQUIRES_ARM_NEON_DOT;
6325     for (size_t k = 1; k < 8; k++) {
6326       for (uint32_t n = 1; n <= 8; n++) {
6327         for (uint32_t m = 1; m <= 5; m++) {
6328           GemmMicrokernelTester()
6329             .mr(5)
6330             .nr(8)
6331             .kr(4)
6332             .sr(1)
6333             .m(m)
6334             .n(n)
6335             .k(k)
6336             .iterations(1)
6337             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6338         }
6339       }
6340     }
6341   }
6342 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,k_gt_8)6343   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_gt_8) {
6344     TEST_REQUIRES_ARM_NEON_DOT;
6345     for (size_t k = 9; k < 16; k++) {
6346       GemmMicrokernelTester()
6347         .mr(5)
6348         .nr(8)
6349         .kr(4)
6350         .sr(1)
6351         .m(5)
6352         .n(8)
6353         .k(k)
6354         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6355     }
6356   }
6357 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,k_gt_8_subtile)6358   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_gt_8_subtile) {
6359     TEST_REQUIRES_ARM_NEON_DOT;
6360     for (size_t k = 9; k < 16; k++) {
6361       for (uint32_t n = 1; n <= 8; n++) {
6362         for (uint32_t m = 1; m <= 5; m++) {
6363           GemmMicrokernelTester()
6364             .mr(5)
6365             .nr(8)
6366             .kr(4)
6367             .sr(1)
6368             .m(m)
6369             .n(n)
6370             .k(k)
6371             .iterations(1)
6372             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6373         }
6374       }
6375     }
6376   }
6377 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,k_div_8)6378   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_div_8) {
6379     TEST_REQUIRES_ARM_NEON_DOT;
6380     for (size_t k = 16; k <= 80; k += 8) {
6381       GemmMicrokernelTester()
6382         .mr(5)
6383         .nr(8)
6384         .kr(4)
6385         .sr(1)
6386         .m(5)
6387         .n(8)
6388         .k(k)
6389         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6390     }
6391   }
6392 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,k_div_8_subtile)6393   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_div_8_subtile) {
6394     TEST_REQUIRES_ARM_NEON_DOT;
6395     for (size_t k = 16; k <= 80; k += 8) {
6396       for (uint32_t n = 1; n <= 8; n++) {
6397         for (uint32_t m = 1; m <= 5; m++) {
6398           GemmMicrokernelTester()
6399             .mr(5)
6400             .nr(8)
6401             .kr(4)
6402             .sr(1)
6403             .m(m)
6404             .n(n)
6405             .k(k)
6406             .iterations(1)
6407             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6408         }
6409       }
6410     }
6411   }
6412 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,n_gt_8)6413   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, n_gt_8) {
6414     TEST_REQUIRES_ARM_NEON_DOT;
6415     for (uint32_t n = 9; n < 16; n++) {
6416       for (size_t k = 1; k <= 40; k += 9) {
6417         GemmMicrokernelTester()
6418           .mr(5)
6419           .nr(8)
6420           .kr(4)
6421           .sr(1)
6422           .m(5)
6423           .n(n)
6424           .k(k)
6425           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6426       }
6427     }
6428   }
6429 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,n_gt_8_strided_cn)6430   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, n_gt_8_strided_cn) {
6431     TEST_REQUIRES_ARM_NEON_DOT;
6432     for (uint32_t n = 9; n < 16; n++) {
6433       for (size_t k = 1; k <= 40; k += 9) {
6434         GemmMicrokernelTester()
6435           .mr(5)
6436           .nr(8)
6437           .kr(4)
6438           .sr(1)
6439           .m(5)
6440           .n(n)
6441           .k(k)
6442           .cn_stride(11)
6443           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6444       }
6445     }
6446   }
6447 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,n_gt_8_subtile)6448   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, n_gt_8_subtile) {
6449     TEST_REQUIRES_ARM_NEON_DOT;
6450     for (uint32_t n = 9; n < 16; n++) {
6451       for (size_t k = 1; k <= 40; k += 9) {
6452         for (uint32_t m = 1; m <= 5; m++) {
6453           GemmMicrokernelTester()
6454             .mr(5)
6455             .nr(8)
6456             .kr(4)
6457             .sr(1)
6458             .m(m)
6459             .n(n)
6460             .k(k)
6461             .iterations(1)
6462             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6463         }
6464       }
6465     }
6466   }
6467 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,n_div_8)6468   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, n_div_8) {
6469     TEST_REQUIRES_ARM_NEON_DOT;
6470     for (uint32_t n = 16; n <= 24; n += 8) {
6471       for (size_t k = 1; k <= 40; k += 9) {
6472         GemmMicrokernelTester()
6473           .mr(5)
6474           .nr(8)
6475           .kr(4)
6476           .sr(1)
6477           .m(5)
6478           .n(n)
6479           .k(k)
6480           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6481       }
6482     }
6483   }
6484 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,n_div_8_strided_cn)6485   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, n_div_8_strided_cn) {
6486     TEST_REQUIRES_ARM_NEON_DOT;
6487     for (uint32_t n = 16; n <= 24; n += 8) {
6488       for (size_t k = 1; k <= 40; k += 9) {
6489         GemmMicrokernelTester()
6490           .mr(5)
6491           .nr(8)
6492           .kr(4)
6493           .sr(1)
6494           .m(5)
6495           .n(n)
6496           .k(k)
6497           .cn_stride(11)
6498           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6499       }
6500     }
6501   }
6502 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,n_div_8_subtile)6503   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, n_div_8_subtile) {
6504     TEST_REQUIRES_ARM_NEON_DOT;
6505     for (uint32_t n = 16; n <= 24; n += 8) {
6506       for (size_t k = 1; k <= 40; k += 9) {
6507         for (uint32_t m = 1; m <= 5; m++) {
6508           GemmMicrokernelTester()
6509             .mr(5)
6510             .nr(8)
6511             .kr(4)
6512             .sr(1)
6513             .m(m)
6514             .n(n)
6515             .k(k)
6516             .iterations(1)
6517             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6518         }
6519       }
6520     }
6521   }
6522 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,small_kernel)6523   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, small_kernel) {
6524     TEST_REQUIRES_ARM_NEON_DOT;
6525     for (size_t k = 1; k <= 40; k += 9) {
6526       GemmMicrokernelTester()
6527         .mr(5)
6528         .nr(8)
6529         .kr(4)
6530         .sr(1)
6531         .m(5)
6532         .n(8)
6533         .k(k)
6534         .ks(3)
6535         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6536     }
6537   }
6538 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,small_kernel_subtile)6539   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, small_kernel_subtile) {
6540     TEST_REQUIRES_ARM_NEON_DOT;
6541     for (size_t k = 1; k <= 40; k += 9) {
6542       for (uint32_t n = 1; n <= 8; n++) {
6543         for (uint32_t m = 1; m <= 5; m++) {
6544           GemmMicrokernelTester()
6545             .mr(5)
6546             .nr(8)
6547             .kr(4)
6548             .sr(1)
6549             .m(m)
6550             .n(n)
6551             .k(k)
6552             .ks(3)
6553             .iterations(1)
6554             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6555         }
6556       }
6557     }
6558   }
6559 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,n_gt_8_small_kernel)6560   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, n_gt_8_small_kernel) {
6561     TEST_REQUIRES_ARM_NEON_DOT;
6562     for (uint32_t n = 9; n < 16; n++) {
6563       for (size_t k = 1; k <= 40; k += 9) {
6564         GemmMicrokernelTester()
6565           .mr(5)
6566           .nr(8)
6567           .kr(4)
6568           .sr(1)
6569           .m(5)
6570           .n(n)
6571           .k(k)
6572           .ks(3)
6573           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6574       }
6575     }
6576   }
6577 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,n_div_8_small_kernel)6578   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, n_div_8_small_kernel) {
6579     TEST_REQUIRES_ARM_NEON_DOT;
6580     for (uint32_t n = 16; n <= 24; n += 8) {
6581       for (size_t k = 1; k <= 40; k += 9) {
6582         GemmMicrokernelTester()
6583           .mr(5)
6584           .nr(8)
6585           .kr(4)
6586           .sr(1)
6587           .m(5)
6588           .n(n)
6589           .k(k)
6590           .ks(3)
6591           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6592       }
6593     }
6594   }
6595 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,strided_cm_subtile)6596   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, strided_cm_subtile) {
6597     TEST_REQUIRES_ARM_NEON_DOT;
6598     for (size_t k = 1; k <= 40; k += 9) {
6599       for (uint32_t n = 1; n <= 8; n++) {
6600         for (uint32_t m = 1; m <= 5; m++) {
6601           GemmMicrokernelTester()
6602             .mr(5)
6603             .nr(8)
6604             .kr(4)
6605             .sr(1)
6606             .m(m)
6607             .n(n)
6608             .k(k)
6609             .cm_stride(11)
6610             .iterations(1)
6611             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6612         }
6613       }
6614     }
6615   }
6616 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,a_offset)6617   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, a_offset) {
6618     TEST_REQUIRES_ARM_NEON_DOT;
6619     for (size_t k = 1; k <= 40; k += 9) {
6620       GemmMicrokernelTester()
6621         .mr(5)
6622         .nr(8)
6623         .kr(4)
6624         .sr(1)
6625         .m(5)
6626         .n(8)
6627         .k(k)
6628         .ks(3)
6629         .a_offset(211)
6630         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6631     }
6632   }
6633 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,zero)6634   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, zero) {
6635     TEST_REQUIRES_ARM_NEON_DOT;
6636     for (size_t k = 1; k <= 40; k += 9) {
6637       for (uint32_t mz = 0; mz < 5; mz++) {
6638         GemmMicrokernelTester()
6639           .mr(5)
6640           .nr(8)
6641           .kr(4)
6642           .sr(1)
6643           .m(5)
6644           .n(8)
6645           .k(k)
6646           .ks(3)
6647           .a_offset(211)
6648           .zero_index(mz)
6649           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6650       }
6651     }
6652   }
6653 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,qmin)6654   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, qmin) {
6655     TEST_REQUIRES_ARM_NEON_DOT;
6656     GemmMicrokernelTester()
6657       .mr(5)
6658       .nr(8)
6659       .kr(4)
6660       .sr(1)
6661       .m(5)
6662       .n(8)
6663       .k(8)
6664       .qmin(128)
6665       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6666   }
6667 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,qmax)6668   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, qmax) {
6669     TEST_REQUIRES_ARM_NEON_DOT;
6670     GemmMicrokernelTester()
6671       .mr(5)
6672       .nr(8)
6673       .kr(4)
6674       .sr(1)
6675       .m(5)
6676       .n(8)
6677       .k(8)
6678       .qmax(128)
6679       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6680   }
6681 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,strided_cm)6682   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, strided_cm) {
6683     TEST_REQUIRES_ARM_NEON_DOT;
6684     GemmMicrokernelTester()
6685       .mr(5)
6686       .nr(8)
6687       .kr(4)
6688       .sr(1)
6689       .m(5)
6690       .n(8)
6691       .k(8)
6692       .cm_stride(11)
6693       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6694   }
6695 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,no_a_zero_point)6696   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, no_a_zero_point) {
6697     TEST_REQUIRES_ARM_NEON_DOT;
6698     for (size_t k = 1; k <= 40; k += 9) {
6699       GemmMicrokernelTester()
6700         .mr(5)
6701         .nr(8)
6702         .kr(4)
6703         .sr(1)
6704         .m(5)
6705         .n(8)
6706         .k(k)
6707         .a_zero_point(0)
6708         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6709     }
6710   }
6711 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,no_b_zero_point)6712   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, no_b_zero_point) {
6713     TEST_REQUIRES_ARM_NEON_DOT;
6714     for (size_t k = 1; k <= 40; k += 9) {
6715       GemmMicrokernelTester()
6716         .mr(5)
6717         .nr(8)
6718         .kr(4)
6719         .sr(1)
6720         .m(5)
6721         .n(8)
6722         .k(k)
6723         .b_zero_point(0)
6724         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6725     }
6726   }
6727 
TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,no_zero_point)6728   TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, no_zero_point) {
6729     TEST_REQUIRES_ARM_NEON_DOT;
6730     for (size_t k = 1; k <= 40; k += 9) {
6731       GemmMicrokernelTester()
6732         .mr(5)
6733         .nr(8)
6734         .kr(4)
6735         .sr(1)
6736         .m(5)
6737         .n(8)
6738         .k(k)
6739         .a_zero_point(0)
6740         .b_zero_point(0)
6741         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6742     }
6743   }
6744 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
6745 
6746 
6747 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,k_eq_8)6748   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_eq_8) {
6749     TEST_REQUIRES_ARM_NEON_DOT;
6750     GemmMicrokernelTester()
6751       .mr(1)
6752       .nr(16)
6753       .kr(4)
6754       .sr(1)
6755       .m(1)
6756       .n(16)
6757       .k(8)
6758       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6759   }
6760 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,strided_cn)6761   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, strided_cn) {
6762     TEST_REQUIRES_ARM_NEON_DOT;
6763     GemmMicrokernelTester()
6764       .mr(1)
6765       .nr(16)
6766       .kr(4)
6767       .sr(1)
6768       .m(1)
6769       .n(16)
6770       .k(8)
6771       .cn_stride(19)
6772       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6773   }
6774 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,k_eq_8_subtile)6775   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_eq_8_subtile) {
6776     TEST_REQUIRES_ARM_NEON_DOT;
6777     for (uint32_t n = 1; n <= 16; n++) {
6778       for (uint32_t m = 1; m <= 1; m++) {
6779         GemmMicrokernelTester()
6780           .mr(1)
6781           .nr(16)
6782           .kr(4)
6783           .sr(1)
6784           .m(m)
6785           .n(n)
6786           .k(8)
6787           .iterations(1)
6788           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6789       }
6790     }
6791   }
6792 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,k_eq_8_subtile_m)6793   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_eq_8_subtile_m) {
6794     TEST_REQUIRES_ARM_NEON_DOT;
6795     for (uint32_t m = 1; m <= 1; m++) {
6796       GemmMicrokernelTester()
6797         .mr(1)
6798         .nr(16)
6799         .kr(4)
6800         .sr(1)
6801         .m(m)
6802         .n(16)
6803         .k(8)
6804         .iterations(1)
6805         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6806     }
6807   }
6808 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,k_eq_8_subtile_n)6809   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_eq_8_subtile_n) {
6810     TEST_REQUIRES_ARM_NEON_DOT;
6811     for (uint32_t n = 1; n <= 16; n++) {
6812       GemmMicrokernelTester()
6813         .mr(1)
6814         .nr(16)
6815         .kr(4)
6816         .sr(1)
6817         .m(1)
6818         .n(n)
6819         .k(8)
6820         .iterations(1)
6821         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6822     }
6823   }
6824 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,k_lt_8)6825   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_lt_8) {
6826     TEST_REQUIRES_ARM_NEON_DOT;
6827     for (size_t k = 1; k < 8; k++) {
6828       GemmMicrokernelTester()
6829         .mr(1)
6830         .nr(16)
6831         .kr(4)
6832         .sr(1)
6833         .m(1)
6834         .n(16)
6835         .k(k)
6836         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6837     }
6838   }
6839 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,k_lt_8_subtile)6840   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_lt_8_subtile) {
6841     TEST_REQUIRES_ARM_NEON_DOT;
6842     for (size_t k = 1; k < 8; k++) {
6843       for (uint32_t n = 1; n <= 16; n++) {
6844         for (uint32_t m = 1; m <= 1; m++) {
6845           GemmMicrokernelTester()
6846             .mr(1)
6847             .nr(16)
6848             .kr(4)
6849             .sr(1)
6850             .m(m)
6851             .n(n)
6852             .k(k)
6853             .iterations(1)
6854             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6855         }
6856       }
6857     }
6858   }
6859 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,k_gt_8)6860   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_gt_8) {
6861     TEST_REQUIRES_ARM_NEON_DOT;
6862     for (size_t k = 9; k < 16; k++) {
6863       GemmMicrokernelTester()
6864         .mr(1)
6865         .nr(16)
6866         .kr(4)
6867         .sr(1)
6868         .m(1)
6869         .n(16)
6870         .k(k)
6871         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6872     }
6873   }
6874 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,k_gt_8_subtile)6875   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_gt_8_subtile) {
6876     TEST_REQUIRES_ARM_NEON_DOT;
6877     for (size_t k = 9; k < 16; k++) {
6878       for (uint32_t n = 1; n <= 16; n++) {
6879         for (uint32_t m = 1; m <= 1; m++) {
6880           GemmMicrokernelTester()
6881             .mr(1)
6882             .nr(16)
6883             .kr(4)
6884             .sr(1)
6885             .m(m)
6886             .n(n)
6887             .k(k)
6888             .iterations(1)
6889             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6890         }
6891       }
6892     }
6893   }
6894 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,k_div_8)6895   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_div_8) {
6896     TEST_REQUIRES_ARM_NEON_DOT;
6897     for (size_t k = 16; k <= 80; k += 8) {
6898       GemmMicrokernelTester()
6899         .mr(1)
6900         .nr(16)
6901         .kr(4)
6902         .sr(1)
6903         .m(1)
6904         .n(16)
6905         .k(k)
6906         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6907     }
6908   }
6909 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,k_div_8_subtile)6910   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_div_8_subtile) {
6911     TEST_REQUIRES_ARM_NEON_DOT;
6912     for (size_t k = 16; k <= 80; k += 8) {
6913       for (uint32_t n = 1; n <= 16; n++) {
6914         for (uint32_t m = 1; m <= 1; m++) {
6915           GemmMicrokernelTester()
6916             .mr(1)
6917             .nr(16)
6918             .kr(4)
6919             .sr(1)
6920             .m(m)
6921             .n(n)
6922             .k(k)
6923             .iterations(1)
6924             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6925         }
6926       }
6927     }
6928   }
6929 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,n_gt_16)6930   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_gt_16) {
6931     TEST_REQUIRES_ARM_NEON_DOT;
6932     for (uint32_t n = 17; n < 32; n++) {
6933       for (size_t k = 1; k <= 40; k += 9) {
6934         GemmMicrokernelTester()
6935           .mr(1)
6936           .nr(16)
6937           .kr(4)
6938           .sr(1)
6939           .m(1)
6940           .n(n)
6941           .k(k)
6942           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6943       }
6944     }
6945   }
6946 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,n_gt_16_strided_cn)6947   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_gt_16_strided_cn) {
6948     TEST_REQUIRES_ARM_NEON_DOT;
6949     for (uint32_t n = 17; n < 32; n++) {
6950       for (size_t k = 1; k <= 40; k += 9) {
6951         GemmMicrokernelTester()
6952           .mr(1)
6953           .nr(16)
6954           .kr(4)
6955           .sr(1)
6956           .m(1)
6957           .n(n)
6958           .k(k)
6959           .cn_stride(19)
6960           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6961       }
6962     }
6963   }
6964 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,n_gt_16_subtile)6965   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_gt_16_subtile) {
6966     TEST_REQUIRES_ARM_NEON_DOT;
6967     for (uint32_t n = 17; n < 32; n++) {
6968       for (size_t k = 1; k <= 40; k += 9) {
6969         for (uint32_t m = 1; m <= 1; m++) {
6970           GemmMicrokernelTester()
6971             .mr(1)
6972             .nr(16)
6973             .kr(4)
6974             .sr(1)
6975             .m(m)
6976             .n(n)
6977             .k(k)
6978             .iterations(1)
6979             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6980         }
6981       }
6982     }
6983   }
6984 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,n_div_16)6985   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_div_16) {
6986     TEST_REQUIRES_ARM_NEON_DOT;
6987     for (uint32_t n = 32; n <= 48; n += 16) {
6988       for (size_t k = 1; k <= 40; k += 9) {
6989         GemmMicrokernelTester()
6990           .mr(1)
6991           .nr(16)
6992           .kr(4)
6993           .sr(1)
6994           .m(1)
6995           .n(n)
6996           .k(k)
6997           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
6998       }
6999     }
7000   }
7001 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,n_div_16_strided_cn)7002   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_div_16_strided_cn) {
7003     TEST_REQUIRES_ARM_NEON_DOT;
7004     for (uint32_t n = 32; n <= 48; n += 16) {
7005       for (size_t k = 1; k <= 40; k += 9) {
7006         GemmMicrokernelTester()
7007           .mr(1)
7008           .nr(16)
7009           .kr(4)
7010           .sr(1)
7011           .m(1)
7012           .n(n)
7013           .k(k)
7014           .cn_stride(19)
7015           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7016       }
7017     }
7018   }
7019 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,n_div_16_subtile)7020   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_div_16_subtile) {
7021     TEST_REQUIRES_ARM_NEON_DOT;
7022     for (uint32_t n = 32; n <= 48; n += 16) {
7023       for (size_t k = 1; k <= 40; k += 9) {
7024         for (uint32_t m = 1; m <= 1; m++) {
7025           GemmMicrokernelTester()
7026             .mr(1)
7027             .nr(16)
7028             .kr(4)
7029             .sr(1)
7030             .m(m)
7031             .n(n)
7032             .k(k)
7033             .iterations(1)
7034             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7035         }
7036       }
7037     }
7038   }
7039 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,small_kernel)7040   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, small_kernel) {
7041     TEST_REQUIRES_ARM_NEON_DOT;
7042     for (size_t k = 1; k <= 40; k += 9) {
7043       GemmMicrokernelTester()
7044         .mr(1)
7045         .nr(16)
7046         .kr(4)
7047         .sr(1)
7048         .m(1)
7049         .n(16)
7050         .k(k)
7051         .ks(3)
7052         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7053     }
7054   }
7055 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,small_kernel_subtile)7056   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, small_kernel_subtile) {
7057     TEST_REQUIRES_ARM_NEON_DOT;
7058     for (size_t k = 1; k <= 40; k += 9) {
7059       for (uint32_t n = 1; n <= 16; n++) {
7060         for (uint32_t m = 1; m <= 1; m++) {
7061           GemmMicrokernelTester()
7062             .mr(1)
7063             .nr(16)
7064             .kr(4)
7065             .sr(1)
7066             .m(m)
7067             .n(n)
7068             .k(k)
7069             .ks(3)
7070             .iterations(1)
7071             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7072         }
7073       }
7074     }
7075   }
7076 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,n_gt_16_small_kernel)7077   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_gt_16_small_kernel) {
7078     TEST_REQUIRES_ARM_NEON_DOT;
7079     for (uint32_t n = 17; n < 32; n++) {
7080       for (size_t k = 1; k <= 40; k += 9) {
7081         GemmMicrokernelTester()
7082           .mr(1)
7083           .nr(16)
7084           .kr(4)
7085           .sr(1)
7086           .m(1)
7087           .n(n)
7088           .k(k)
7089           .ks(3)
7090           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7091       }
7092     }
7093   }
7094 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,n_div_16_small_kernel)7095   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_div_16_small_kernel) {
7096     TEST_REQUIRES_ARM_NEON_DOT;
7097     for (uint32_t n = 32; n <= 48; n += 16) {
7098       for (size_t k = 1; k <= 40; k += 9) {
7099         GemmMicrokernelTester()
7100           .mr(1)
7101           .nr(16)
7102           .kr(4)
7103           .sr(1)
7104           .m(1)
7105           .n(n)
7106           .k(k)
7107           .ks(3)
7108           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7109       }
7110     }
7111   }
7112 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,strided_cm_subtile)7113   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, strided_cm_subtile) {
7114     TEST_REQUIRES_ARM_NEON_DOT;
7115     for (size_t k = 1; k <= 40; k += 9) {
7116       for (uint32_t n = 1; n <= 16; n++) {
7117         for (uint32_t m = 1; m <= 1; m++) {
7118           GemmMicrokernelTester()
7119             .mr(1)
7120             .nr(16)
7121             .kr(4)
7122             .sr(1)
7123             .m(m)
7124             .n(n)
7125             .k(k)
7126             .cm_stride(19)
7127             .iterations(1)
7128             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7129         }
7130       }
7131     }
7132   }
7133 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,a_offset)7134   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, a_offset) {
7135     TEST_REQUIRES_ARM_NEON_DOT;
7136     for (size_t k = 1; k <= 40; k += 9) {
7137       GemmMicrokernelTester()
7138         .mr(1)
7139         .nr(16)
7140         .kr(4)
7141         .sr(1)
7142         .m(1)
7143         .n(16)
7144         .k(k)
7145         .ks(3)
7146         .a_offset(43)
7147         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7148     }
7149   }
7150 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,zero)7151   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, zero) {
7152     TEST_REQUIRES_ARM_NEON_DOT;
7153     for (size_t k = 1; k <= 40; k += 9) {
7154       for (uint32_t mz = 0; mz < 1; mz++) {
7155         GemmMicrokernelTester()
7156           .mr(1)
7157           .nr(16)
7158           .kr(4)
7159           .sr(1)
7160           .m(1)
7161           .n(16)
7162           .k(k)
7163           .ks(3)
7164           .a_offset(43)
7165           .zero_index(mz)
7166           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7167       }
7168     }
7169   }
7170 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,qmin)7171   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, qmin) {
7172     TEST_REQUIRES_ARM_NEON_DOT;
7173     GemmMicrokernelTester()
7174       .mr(1)
7175       .nr(16)
7176       .kr(4)
7177       .sr(1)
7178       .m(1)
7179       .n(16)
7180       .k(8)
7181       .qmin(128)
7182       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7183   }
7184 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,qmax)7185   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, qmax) {
7186     TEST_REQUIRES_ARM_NEON_DOT;
7187     GemmMicrokernelTester()
7188       .mr(1)
7189       .nr(16)
7190       .kr(4)
7191       .sr(1)
7192       .m(1)
7193       .n(16)
7194       .k(8)
7195       .qmax(128)
7196       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7197   }
7198 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,strided_cm)7199   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, strided_cm) {
7200     TEST_REQUIRES_ARM_NEON_DOT;
7201     GemmMicrokernelTester()
7202       .mr(1)
7203       .nr(16)
7204       .kr(4)
7205       .sr(1)
7206       .m(1)
7207       .n(16)
7208       .k(8)
7209       .cm_stride(19)
7210       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7211   }
7212 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,no_a_zero_point)7213   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, no_a_zero_point) {
7214     TEST_REQUIRES_ARM_NEON_DOT;
7215     for (size_t k = 1; k <= 40; k += 9) {
7216       GemmMicrokernelTester()
7217         .mr(1)
7218         .nr(16)
7219         .kr(4)
7220         .sr(1)
7221         .m(1)
7222         .n(16)
7223         .k(k)
7224         .a_zero_point(0)
7225         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7226     }
7227   }
7228 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,no_b_zero_point)7229   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, no_b_zero_point) {
7230     TEST_REQUIRES_ARM_NEON_DOT;
7231     for (size_t k = 1; k <= 40; k += 9) {
7232       GemmMicrokernelTester()
7233         .mr(1)
7234         .nr(16)
7235         .kr(4)
7236         .sr(1)
7237         .m(1)
7238         .n(16)
7239         .k(k)
7240         .b_zero_point(0)
7241         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7242     }
7243   }
7244 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,no_zero_point)7245   TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, no_zero_point) {
7246     TEST_REQUIRES_ARM_NEON_DOT;
7247     for (size_t k = 1; k <= 40; k += 9) {
7248       GemmMicrokernelTester()
7249         .mr(1)
7250         .nr(16)
7251         .kr(4)
7252         .sr(1)
7253         .m(1)
7254         .n(16)
7255         .k(k)
7256         .a_zero_point(0)
7257         .b_zero_point(0)
7258         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7259     }
7260   }
7261 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
7262 
7263 
7264 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_eq_8)7265   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_eq_8) {
7266     TEST_REQUIRES_ARM_NEON_DOT;
7267     GemmMicrokernelTester()
7268       .mr(6)
7269       .nr(16)
7270       .kr(4)
7271       .sr(1)
7272       .m(6)
7273       .n(16)
7274       .k(8)
7275       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7276   }
7277 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,strided_cn)7278   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, strided_cn) {
7279     TEST_REQUIRES_ARM_NEON_DOT;
7280     GemmMicrokernelTester()
7281       .mr(6)
7282       .nr(16)
7283       .kr(4)
7284       .sr(1)
7285       .m(6)
7286       .n(16)
7287       .k(8)
7288       .cn_stride(19)
7289       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7290   }
7291 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_eq_8_subtile)7292   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_eq_8_subtile) {
7293     TEST_REQUIRES_ARM_NEON_DOT;
7294     for (uint32_t n = 1; n <= 16; n++) {
7295       for (uint32_t m = 1; m <= 6; m++) {
7296         GemmMicrokernelTester()
7297           .mr(6)
7298           .nr(16)
7299           .kr(4)
7300           .sr(1)
7301           .m(m)
7302           .n(n)
7303           .k(8)
7304           .iterations(1)
7305           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7306       }
7307     }
7308   }
7309 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_eq_8_subtile_m)7310   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_eq_8_subtile_m) {
7311     TEST_REQUIRES_ARM_NEON_DOT;
7312     for (uint32_t m = 1; m <= 6; m++) {
7313       GemmMicrokernelTester()
7314         .mr(6)
7315         .nr(16)
7316         .kr(4)
7317         .sr(1)
7318         .m(m)
7319         .n(16)
7320         .k(8)
7321         .iterations(1)
7322         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7323     }
7324   }
7325 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_eq_8_subtile_n)7326   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_eq_8_subtile_n) {
7327     TEST_REQUIRES_ARM_NEON_DOT;
7328     for (uint32_t n = 1; n <= 16; n++) {
7329       GemmMicrokernelTester()
7330         .mr(6)
7331         .nr(16)
7332         .kr(4)
7333         .sr(1)
7334         .m(6)
7335         .n(n)
7336         .k(8)
7337         .iterations(1)
7338         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7339     }
7340   }
7341 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_lt_8)7342   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_lt_8) {
7343     TEST_REQUIRES_ARM_NEON_DOT;
7344     for (size_t k = 1; k < 8; k++) {
7345       GemmMicrokernelTester()
7346         .mr(6)
7347         .nr(16)
7348         .kr(4)
7349         .sr(1)
7350         .m(6)
7351         .n(16)
7352         .k(k)
7353         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7354     }
7355   }
7356 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_lt_8_subtile)7357   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_lt_8_subtile) {
7358     TEST_REQUIRES_ARM_NEON_DOT;
7359     for (size_t k = 1; k < 8; k++) {
7360       for (uint32_t n = 1; n <= 16; n++) {
7361         for (uint32_t m = 1; m <= 6; m++) {
7362           GemmMicrokernelTester()
7363             .mr(6)
7364             .nr(16)
7365             .kr(4)
7366             .sr(1)
7367             .m(m)
7368             .n(n)
7369             .k(k)
7370             .iterations(1)
7371             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7372         }
7373       }
7374     }
7375   }
7376 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_gt_8)7377   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_gt_8) {
7378     TEST_REQUIRES_ARM_NEON_DOT;
7379     for (size_t k = 9; k < 16; k++) {
7380       GemmMicrokernelTester()
7381         .mr(6)
7382         .nr(16)
7383         .kr(4)
7384         .sr(1)
7385         .m(6)
7386         .n(16)
7387         .k(k)
7388         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7389     }
7390   }
7391 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_gt_8_subtile)7392   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_gt_8_subtile) {
7393     TEST_REQUIRES_ARM_NEON_DOT;
7394     for (size_t k = 9; k < 16; k++) {
7395       for (uint32_t n = 1; n <= 16; n++) {
7396         for (uint32_t m = 1; m <= 6; m++) {
7397           GemmMicrokernelTester()
7398             .mr(6)
7399             .nr(16)
7400             .kr(4)
7401             .sr(1)
7402             .m(m)
7403             .n(n)
7404             .k(k)
7405             .iterations(1)
7406             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7407         }
7408       }
7409     }
7410   }
7411 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_div_8)7412   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_div_8) {
7413     TEST_REQUIRES_ARM_NEON_DOT;
7414     for (size_t k = 16; k <= 80; k += 8) {
7415       GemmMicrokernelTester()
7416         .mr(6)
7417         .nr(16)
7418         .kr(4)
7419         .sr(1)
7420         .m(6)
7421         .n(16)
7422         .k(k)
7423         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7424     }
7425   }
7426 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_div_8_subtile)7427   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_div_8_subtile) {
7428     TEST_REQUIRES_ARM_NEON_DOT;
7429     for (size_t k = 16; k <= 80; k += 8) {
7430       for (uint32_t n = 1; n <= 16; n++) {
7431         for (uint32_t m = 1; m <= 6; m++) {
7432           GemmMicrokernelTester()
7433             .mr(6)
7434             .nr(16)
7435             .kr(4)
7436             .sr(1)
7437             .m(m)
7438             .n(n)
7439             .k(k)
7440             .iterations(1)
7441             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7442         }
7443       }
7444     }
7445   }
7446 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,n_gt_16)7447   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_gt_16) {
7448     TEST_REQUIRES_ARM_NEON_DOT;
7449     for (uint32_t n = 17; n < 32; n++) {
7450       for (size_t k = 1; k <= 40; k += 9) {
7451         GemmMicrokernelTester()
7452           .mr(6)
7453           .nr(16)
7454           .kr(4)
7455           .sr(1)
7456           .m(6)
7457           .n(n)
7458           .k(k)
7459           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7460       }
7461     }
7462   }
7463 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,n_gt_16_strided_cn)7464   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_gt_16_strided_cn) {
7465     TEST_REQUIRES_ARM_NEON_DOT;
7466     for (uint32_t n = 17; n < 32; n++) {
7467       for (size_t k = 1; k <= 40; k += 9) {
7468         GemmMicrokernelTester()
7469           .mr(6)
7470           .nr(16)
7471           .kr(4)
7472           .sr(1)
7473           .m(6)
7474           .n(n)
7475           .k(k)
7476           .cn_stride(19)
7477           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7478       }
7479     }
7480   }
7481 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,n_gt_16_subtile)7482   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_gt_16_subtile) {
7483     TEST_REQUIRES_ARM_NEON_DOT;
7484     for (uint32_t n = 17; n < 32; n++) {
7485       for (size_t k = 1; k <= 40; k += 9) {
7486         for (uint32_t m = 1; m <= 6; m++) {
7487           GemmMicrokernelTester()
7488             .mr(6)
7489             .nr(16)
7490             .kr(4)
7491             .sr(1)
7492             .m(m)
7493             .n(n)
7494             .k(k)
7495             .iterations(1)
7496             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7497         }
7498       }
7499     }
7500   }
7501 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,n_div_16)7502   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_div_16) {
7503     TEST_REQUIRES_ARM_NEON_DOT;
7504     for (uint32_t n = 32; n <= 48; n += 16) {
7505       for (size_t k = 1; k <= 40; k += 9) {
7506         GemmMicrokernelTester()
7507           .mr(6)
7508           .nr(16)
7509           .kr(4)
7510           .sr(1)
7511           .m(6)
7512           .n(n)
7513           .k(k)
7514           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7515       }
7516     }
7517   }
7518 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,n_div_16_strided_cn)7519   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_div_16_strided_cn) {
7520     TEST_REQUIRES_ARM_NEON_DOT;
7521     for (uint32_t n = 32; n <= 48; n += 16) {
7522       for (size_t k = 1; k <= 40; k += 9) {
7523         GemmMicrokernelTester()
7524           .mr(6)
7525           .nr(16)
7526           .kr(4)
7527           .sr(1)
7528           .m(6)
7529           .n(n)
7530           .k(k)
7531           .cn_stride(19)
7532           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7533       }
7534     }
7535   }
7536 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,n_div_16_subtile)7537   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_div_16_subtile) {
7538     TEST_REQUIRES_ARM_NEON_DOT;
7539     for (uint32_t n = 32; n <= 48; n += 16) {
7540       for (size_t k = 1; k <= 40; k += 9) {
7541         for (uint32_t m = 1; m <= 6; m++) {
7542           GemmMicrokernelTester()
7543             .mr(6)
7544             .nr(16)
7545             .kr(4)
7546             .sr(1)
7547             .m(m)
7548             .n(n)
7549             .k(k)
7550             .iterations(1)
7551             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7552         }
7553       }
7554     }
7555   }
7556 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,small_kernel)7557   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, small_kernel) {
7558     TEST_REQUIRES_ARM_NEON_DOT;
7559     for (size_t k = 1; k <= 40; k += 9) {
7560       GemmMicrokernelTester()
7561         .mr(6)
7562         .nr(16)
7563         .kr(4)
7564         .sr(1)
7565         .m(6)
7566         .n(16)
7567         .k(k)
7568         .ks(3)
7569         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7570     }
7571   }
7572 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,small_kernel_subtile)7573   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, small_kernel_subtile) {
7574     TEST_REQUIRES_ARM_NEON_DOT;
7575     for (size_t k = 1; k <= 40; k += 9) {
7576       for (uint32_t n = 1; n <= 16; n++) {
7577         for (uint32_t m = 1; m <= 6; m++) {
7578           GemmMicrokernelTester()
7579             .mr(6)
7580             .nr(16)
7581             .kr(4)
7582             .sr(1)
7583             .m(m)
7584             .n(n)
7585             .k(k)
7586             .ks(3)
7587             .iterations(1)
7588             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7589         }
7590       }
7591     }
7592   }
7593 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,n_gt_16_small_kernel)7594   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_gt_16_small_kernel) {
7595     TEST_REQUIRES_ARM_NEON_DOT;
7596     for (uint32_t n = 17; n < 32; n++) {
7597       for (size_t k = 1; k <= 40; k += 9) {
7598         GemmMicrokernelTester()
7599           .mr(6)
7600           .nr(16)
7601           .kr(4)
7602           .sr(1)
7603           .m(6)
7604           .n(n)
7605           .k(k)
7606           .ks(3)
7607           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7608       }
7609     }
7610   }
7611 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,n_div_16_small_kernel)7612   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_div_16_small_kernel) {
7613     TEST_REQUIRES_ARM_NEON_DOT;
7614     for (uint32_t n = 32; n <= 48; n += 16) {
7615       for (size_t k = 1; k <= 40; k += 9) {
7616         GemmMicrokernelTester()
7617           .mr(6)
7618           .nr(16)
7619           .kr(4)
7620           .sr(1)
7621           .m(6)
7622           .n(n)
7623           .k(k)
7624           .ks(3)
7625           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7626       }
7627     }
7628   }
7629 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,strided_cm_subtile)7630   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, strided_cm_subtile) {
7631     TEST_REQUIRES_ARM_NEON_DOT;
7632     for (size_t k = 1; k <= 40; k += 9) {
7633       for (uint32_t n = 1; n <= 16; n++) {
7634         for (uint32_t m = 1; m <= 6; m++) {
7635           GemmMicrokernelTester()
7636             .mr(6)
7637             .nr(16)
7638             .kr(4)
7639             .sr(1)
7640             .m(m)
7641             .n(n)
7642             .k(k)
7643             .cm_stride(19)
7644             .iterations(1)
7645             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7646         }
7647       }
7648     }
7649   }
7650 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,a_offset)7651   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, a_offset) {
7652     TEST_REQUIRES_ARM_NEON_DOT;
7653     for (size_t k = 1; k <= 40; k += 9) {
7654       GemmMicrokernelTester()
7655         .mr(6)
7656         .nr(16)
7657         .kr(4)
7658         .sr(1)
7659         .m(6)
7660         .n(16)
7661         .k(k)
7662         .ks(3)
7663         .a_offset(251)
7664         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7665     }
7666   }
7667 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,zero)7668   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, zero) {
7669     TEST_REQUIRES_ARM_NEON_DOT;
7670     for (size_t k = 1; k <= 40; k += 9) {
7671       for (uint32_t mz = 0; mz < 6; mz++) {
7672         GemmMicrokernelTester()
7673           .mr(6)
7674           .nr(16)
7675           .kr(4)
7676           .sr(1)
7677           .m(6)
7678           .n(16)
7679           .k(k)
7680           .ks(3)
7681           .a_offset(251)
7682           .zero_index(mz)
7683           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7684       }
7685     }
7686   }
7687 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,qmin)7688   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, qmin) {
7689     TEST_REQUIRES_ARM_NEON_DOT;
7690     GemmMicrokernelTester()
7691       .mr(6)
7692       .nr(16)
7693       .kr(4)
7694       .sr(1)
7695       .m(6)
7696       .n(16)
7697       .k(8)
7698       .qmin(128)
7699       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7700   }
7701 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,qmax)7702   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, qmax) {
7703     TEST_REQUIRES_ARM_NEON_DOT;
7704     GemmMicrokernelTester()
7705       .mr(6)
7706       .nr(16)
7707       .kr(4)
7708       .sr(1)
7709       .m(6)
7710       .n(16)
7711       .k(8)
7712       .qmax(128)
7713       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7714   }
7715 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,strided_cm)7716   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, strided_cm) {
7717     TEST_REQUIRES_ARM_NEON_DOT;
7718     GemmMicrokernelTester()
7719       .mr(6)
7720       .nr(16)
7721       .kr(4)
7722       .sr(1)
7723       .m(6)
7724       .n(16)
7725       .k(8)
7726       .cm_stride(19)
7727       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7728   }
7729 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,no_a_zero_point)7730   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, no_a_zero_point) {
7731     TEST_REQUIRES_ARM_NEON_DOT;
7732     for (size_t k = 1; k <= 40; k += 9) {
7733       GemmMicrokernelTester()
7734         .mr(6)
7735         .nr(16)
7736         .kr(4)
7737         .sr(1)
7738         .m(6)
7739         .n(16)
7740         .k(k)
7741         .a_zero_point(0)
7742         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7743     }
7744   }
7745 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,no_b_zero_point)7746   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, no_b_zero_point) {
7747     TEST_REQUIRES_ARM_NEON_DOT;
7748     for (size_t k = 1; k <= 40; k += 9) {
7749       GemmMicrokernelTester()
7750         .mr(6)
7751         .nr(16)
7752         .kr(4)
7753         .sr(1)
7754         .m(6)
7755         .n(16)
7756         .k(k)
7757         .b_zero_point(0)
7758         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7759     }
7760   }
7761 
TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,no_zero_point)7762   TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, no_zero_point) {
7763     TEST_REQUIRES_ARM_NEON_DOT;
7764     for (size_t k = 1; k <= 40; k += 9) {
7765       GemmMicrokernelTester()
7766         .mr(6)
7767         .nr(16)
7768         .kr(4)
7769         .sr(1)
7770         .m(6)
7771         .n(16)
7772         .k(k)
7773         .a_zero_point(0)
7774         .b_zero_point(0)
7775         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7776     }
7777   }
7778 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
7779 
7780 
7781 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,k_eq_8)7782   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_eq_8) {
7783     TEST_REQUIRES_ARM_NEON_DOT;
7784     GemmMicrokernelTester()
7785       .mr(1)
7786       .nr(32)
7787       .kr(4)
7788       .sr(1)
7789       .m(1)
7790       .n(32)
7791       .k(8)
7792       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7793   }
7794 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,strided_cn)7795   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, strided_cn) {
7796     TEST_REQUIRES_ARM_NEON_DOT;
7797     GemmMicrokernelTester()
7798       .mr(1)
7799       .nr(32)
7800       .kr(4)
7801       .sr(1)
7802       .m(1)
7803       .n(32)
7804       .k(8)
7805       .cn_stride(37)
7806       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7807   }
7808 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,k_eq_8_subtile)7809   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_eq_8_subtile) {
7810     TEST_REQUIRES_ARM_NEON_DOT;
7811     for (uint32_t n = 1; n <= 32; n++) {
7812       for (uint32_t m = 1; m <= 1; m++) {
7813         GemmMicrokernelTester()
7814           .mr(1)
7815           .nr(32)
7816           .kr(4)
7817           .sr(1)
7818           .m(m)
7819           .n(n)
7820           .k(8)
7821           .iterations(1)
7822           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7823       }
7824     }
7825   }
7826 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,k_eq_8_subtile_m)7827   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_eq_8_subtile_m) {
7828     TEST_REQUIRES_ARM_NEON_DOT;
7829     for (uint32_t m = 1; m <= 1; m++) {
7830       GemmMicrokernelTester()
7831         .mr(1)
7832         .nr(32)
7833         .kr(4)
7834         .sr(1)
7835         .m(m)
7836         .n(32)
7837         .k(8)
7838         .iterations(1)
7839         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7840     }
7841   }
7842 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,k_eq_8_subtile_n)7843   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_eq_8_subtile_n) {
7844     TEST_REQUIRES_ARM_NEON_DOT;
7845     for (uint32_t n = 1; n <= 32; n++) {
7846       GemmMicrokernelTester()
7847         .mr(1)
7848         .nr(32)
7849         .kr(4)
7850         .sr(1)
7851         .m(1)
7852         .n(n)
7853         .k(8)
7854         .iterations(1)
7855         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7856     }
7857   }
7858 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,k_lt_8)7859   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_lt_8) {
7860     TEST_REQUIRES_ARM_NEON_DOT;
7861     for (size_t k = 1; k < 8; k++) {
7862       GemmMicrokernelTester()
7863         .mr(1)
7864         .nr(32)
7865         .kr(4)
7866         .sr(1)
7867         .m(1)
7868         .n(32)
7869         .k(k)
7870         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7871     }
7872   }
7873 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,k_lt_8_subtile)7874   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_lt_8_subtile) {
7875     TEST_REQUIRES_ARM_NEON_DOT;
7876     for (size_t k = 1; k < 8; k++) {
7877       for (uint32_t n = 1; n <= 32; n++) {
7878         for (uint32_t m = 1; m <= 1; m++) {
7879           GemmMicrokernelTester()
7880             .mr(1)
7881             .nr(32)
7882             .kr(4)
7883             .sr(1)
7884             .m(m)
7885             .n(n)
7886             .k(k)
7887             .iterations(1)
7888             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7889         }
7890       }
7891     }
7892   }
7893 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,k_gt_8)7894   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_gt_8) {
7895     TEST_REQUIRES_ARM_NEON_DOT;
7896     for (size_t k = 9; k < 16; k++) {
7897       GemmMicrokernelTester()
7898         .mr(1)
7899         .nr(32)
7900         .kr(4)
7901         .sr(1)
7902         .m(1)
7903         .n(32)
7904         .k(k)
7905         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7906     }
7907   }
7908 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,k_gt_8_subtile)7909   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_gt_8_subtile) {
7910     TEST_REQUIRES_ARM_NEON_DOT;
7911     for (size_t k = 9; k < 16; k++) {
7912       for (uint32_t n = 1; n <= 32; n++) {
7913         for (uint32_t m = 1; m <= 1; m++) {
7914           GemmMicrokernelTester()
7915             .mr(1)
7916             .nr(32)
7917             .kr(4)
7918             .sr(1)
7919             .m(m)
7920             .n(n)
7921             .k(k)
7922             .iterations(1)
7923             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7924         }
7925       }
7926     }
7927   }
7928 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,k_div_8)7929   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_div_8) {
7930     TEST_REQUIRES_ARM_NEON_DOT;
7931     for (size_t k = 16; k <= 80; k += 8) {
7932       GemmMicrokernelTester()
7933         .mr(1)
7934         .nr(32)
7935         .kr(4)
7936         .sr(1)
7937         .m(1)
7938         .n(32)
7939         .k(k)
7940         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7941     }
7942   }
7943 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,k_div_8_subtile)7944   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_div_8_subtile) {
7945     TEST_REQUIRES_ARM_NEON_DOT;
7946     for (size_t k = 16; k <= 80; k += 8) {
7947       for (uint32_t n = 1; n <= 32; n++) {
7948         for (uint32_t m = 1; m <= 1; m++) {
7949           GemmMicrokernelTester()
7950             .mr(1)
7951             .nr(32)
7952             .kr(4)
7953             .sr(1)
7954             .m(m)
7955             .n(n)
7956             .k(k)
7957             .iterations(1)
7958             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7959         }
7960       }
7961     }
7962   }
7963 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,n_gt_32)7964   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, n_gt_32) {
7965     TEST_REQUIRES_ARM_NEON_DOT;
7966     for (uint32_t n = 33; n < 64; n++) {
7967       for (size_t k = 1; k <= 40; k += 9) {
7968         GemmMicrokernelTester()
7969           .mr(1)
7970           .nr(32)
7971           .kr(4)
7972           .sr(1)
7973           .m(1)
7974           .n(n)
7975           .k(k)
7976           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7977       }
7978     }
7979   }
7980 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,n_gt_32_strided_cn)7981   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, n_gt_32_strided_cn) {
7982     TEST_REQUIRES_ARM_NEON_DOT;
7983     for (uint32_t n = 33; n < 64; n++) {
7984       for (size_t k = 1; k <= 40; k += 9) {
7985         GemmMicrokernelTester()
7986           .mr(1)
7987           .nr(32)
7988           .kr(4)
7989           .sr(1)
7990           .m(1)
7991           .n(n)
7992           .k(k)
7993           .cn_stride(37)
7994           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
7995       }
7996     }
7997   }
7998 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,n_gt_32_subtile)7999   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, n_gt_32_subtile) {
8000     TEST_REQUIRES_ARM_NEON_DOT;
8001     for (uint32_t n = 33; n < 64; n++) {
8002       for (size_t k = 1; k <= 40; k += 9) {
8003         for (uint32_t m = 1; m <= 1; m++) {
8004           GemmMicrokernelTester()
8005             .mr(1)
8006             .nr(32)
8007             .kr(4)
8008             .sr(1)
8009             .m(m)
8010             .n(n)
8011             .k(k)
8012             .iterations(1)
8013             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8014         }
8015       }
8016     }
8017   }
8018 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,n_div_32)8019   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, n_div_32) {
8020     TEST_REQUIRES_ARM_NEON_DOT;
8021     for (uint32_t n = 64; n <= 96; n += 32) {
8022       for (size_t k = 1; k <= 40; k += 9) {
8023         GemmMicrokernelTester()
8024           .mr(1)
8025           .nr(32)
8026           .kr(4)
8027           .sr(1)
8028           .m(1)
8029           .n(n)
8030           .k(k)
8031           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8032       }
8033     }
8034   }
8035 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,n_div_32_strided_cn)8036   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, n_div_32_strided_cn) {
8037     TEST_REQUIRES_ARM_NEON_DOT;
8038     for (uint32_t n = 64; n <= 96; n += 32) {
8039       for (size_t k = 1; k <= 40; k += 9) {
8040         GemmMicrokernelTester()
8041           .mr(1)
8042           .nr(32)
8043           .kr(4)
8044           .sr(1)
8045           .m(1)
8046           .n(n)
8047           .k(k)
8048           .cn_stride(37)
8049           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8050       }
8051     }
8052   }
8053 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,n_div_32_subtile)8054   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, n_div_32_subtile) {
8055     TEST_REQUIRES_ARM_NEON_DOT;
8056     for (uint32_t n = 64; n <= 96; n += 32) {
8057       for (size_t k = 1; k <= 40; k += 9) {
8058         for (uint32_t m = 1; m <= 1; m++) {
8059           GemmMicrokernelTester()
8060             .mr(1)
8061             .nr(32)
8062             .kr(4)
8063             .sr(1)
8064             .m(m)
8065             .n(n)
8066             .k(k)
8067             .iterations(1)
8068             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8069         }
8070       }
8071     }
8072   }
8073 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,small_kernel)8074   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, small_kernel) {
8075     TEST_REQUIRES_ARM_NEON_DOT;
8076     for (size_t k = 1; k <= 40; k += 9) {
8077       GemmMicrokernelTester()
8078         .mr(1)
8079         .nr(32)
8080         .kr(4)
8081         .sr(1)
8082         .m(1)
8083         .n(32)
8084         .k(k)
8085         .ks(3)
8086         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8087     }
8088   }
8089 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,small_kernel_subtile)8090   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, small_kernel_subtile) {
8091     TEST_REQUIRES_ARM_NEON_DOT;
8092     for (size_t k = 1; k <= 40; k += 9) {
8093       for (uint32_t n = 1; n <= 32; n++) {
8094         for (uint32_t m = 1; m <= 1; m++) {
8095           GemmMicrokernelTester()
8096             .mr(1)
8097             .nr(32)
8098             .kr(4)
8099             .sr(1)
8100             .m(m)
8101             .n(n)
8102             .k(k)
8103             .ks(3)
8104             .iterations(1)
8105             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8106         }
8107       }
8108     }
8109   }
8110 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,n_gt_32_small_kernel)8111   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, n_gt_32_small_kernel) {
8112     TEST_REQUIRES_ARM_NEON_DOT;
8113     for (uint32_t n = 33; n < 64; n++) {
8114       for (size_t k = 1; k <= 40; k += 9) {
8115         GemmMicrokernelTester()
8116           .mr(1)
8117           .nr(32)
8118           .kr(4)
8119           .sr(1)
8120           .m(1)
8121           .n(n)
8122           .k(k)
8123           .ks(3)
8124           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8125       }
8126     }
8127   }
8128 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,n_div_32_small_kernel)8129   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, n_div_32_small_kernel) {
8130     TEST_REQUIRES_ARM_NEON_DOT;
8131     for (uint32_t n = 64; n <= 96; n += 32) {
8132       for (size_t k = 1; k <= 40; k += 9) {
8133         GemmMicrokernelTester()
8134           .mr(1)
8135           .nr(32)
8136           .kr(4)
8137           .sr(1)
8138           .m(1)
8139           .n(n)
8140           .k(k)
8141           .ks(3)
8142           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8143       }
8144     }
8145   }
8146 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,strided_cm_subtile)8147   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, strided_cm_subtile) {
8148     TEST_REQUIRES_ARM_NEON_DOT;
8149     for (size_t k = 1; k <= 40; k += 9) {
8150       for (uint32_t n = 1; n <= 32; n++) {
8151         for (uint32_t m = 1; m <= 1; m++) {
8152           GemmMicrokernelTester()
8153             .mr(1)
8154             .nr(32)
8155             .kr(4)
8156             .sr(1)
8157             .m(m)
8158             .n(n)
8159             .k(k)
8160             .cm_stride(37)
8161             .iterations(1)
8162             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8163         }
8164       }
8165     }
8166   }
8167 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,a_offset)8168   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, a_offset) {
8169     TEST_REQUIRES_ARM_NEON_DOT;
8170     for (size_t k = 1; k <= 40; k += 9) {
8171       GemmMicrokernelTester()
8172         .mr(1)
8173         .nr(32)
8174         .kr(4)
8175         .sr(1)
8176         .m(1)
8177         .n(32)
8178         .k(k)
8179         .ks(3)
8180         .a_offset(43)
8181         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8182     }
8183   }
8184 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,zero)8185   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, zero) {
8186     TEST_REQUIRES_ARM_NEON_DOT;
8187     for (size_t k = 1; k <= 40; k += 9) {
8188       for (uint32_t mz = 0; mz < 1; mz++) {
8189         GemmMicrokernelTester()
8190           .mr(1)
8191           .nr(32)
8192           .kr(4)
8193           .sr(1)
8194           .m(1)
8195           .n(32)
8196           .k(k)
8197           .ks(3)
8198           .a_offset(43)
8199           .zero_index(mz)
8200           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8201       }
8202     }
8203   }
8204 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,qmin)8205   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, qmin) {
8206     TEST_REQUIRES_ARM_NEON_DOT;
8207     GemmMicrokernelTester()
8208       .mr(1)
8209       .nr(32)
8210       .kr(4)
8211       .sr(1)
8212       .m(1)
8213       .n(32)
8214       .k(8)
8215       .qmin(128)
8216       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8217   }
8218 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,qmax)8219   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, qmax) {
8220     TEST_REQUIRES_ARM_NEON_DOT;
8221     GemmMicrokernelTester()
8222       .mr(1)
8223       .nr(32)
8224       .kr(4)
8225       .sr(1)
8226       .m(1)
8227       .n(32)
8228       .k(8)
8229       .qmax(128)
8230       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8231   }
8232 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,strided_cm)8233   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, strided_cm) {
8234     TEST_REQUIRES_ARM_NEON_DOT;
8235     GemmMicrokernelTester()
8236       .mr(1)
8237       .nr(32)
8238       .kr(4)
8239       .sr(1)
8240       .m(1)
8241       .n(32)
8242       .k(8)
8243       .cm_stride(37)
8244       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8245   }
8246 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,no_a_zero_point)8247   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, no_a_zero_point) {
8248     TEST_REQUIRES_ARM_NEON_DOT;
8249     for (size_t k = 1; k <= 40; k += 9) {
8250       GemmMicrokernelTester()
8251         .mr(1)
8252         .nr(32)
8253         .kr(4)
8254         .sr(1)
8255         .m(1)
8256         .n(32)
8257         .k(k)
8258         .a_zero_point(0)
8259         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8260     }
8261   }
8262 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,no_b_zero_point)8263   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, no_b_zero_point) {
8264     TEST_REQUIRES_ARM_NEON_DOT;
8265     for (size_t k = 1; k <= 40; k += 9) {
8266       GemmMicrokernelTester()
8267         .mr(1)
8268         .nr(32)
8269         .kr(4)
8270         .sr(1)
8271         .m(1)
8272         .n(32)
8273         .k(k)
8274         .b_zero_point(0)
8275         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8276     }
8277   }
8278 
TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,no_zero_point)8279   TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, no_zero_point) {
8280     TEST_REQUIRES_ARM_NEON_DOT;
8281     for (size_t k = 1; k <= 40; k += 9) {
8282       GemmMicrokernelTester()
8283         .mr(1)
8284         .nr(32)
8285         .kr(4)
8286         .sr(1)
8287         .m(1)
8288         .n(32)
8289         .k(k)
8290         .a_zero_point(0)
8291         .b_zero_point(0)
8292         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8293     }
8294   }
8295 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
8296 
8297 
8298 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,k_eq_8)8299   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_eq_8) {
8300     TEST_REQUIRES_ARM_NEON;
8301     GemmMicrokernelTester()
8302       .mr(4)
8303       .nr(16)
8304       .kr(1)
8305       .sr(1)
8306       .m(4)
8307       .n(16)
8308       .k(8)
8309       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8310   }
8311 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,strided_cn)8312   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, strided_cn) {
8313     TEST_REQUIRES_ARM_NEON;
8314     GemmMicrokernelTester()
8315       .mr(4)
8316       .nr(16)
8317       .kr(1)
8318       .sr(1)
8319       .m(4)
8320       .n(16)
8321       .k(8)
8322       .cn_stride(19)
8323       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8324   }
8325 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,k_eq_8_subtile)8326   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_eq_8_subtile) {
8327     TEST_REQUIRES_ARM_NEON;
8328     for (uint32_t n = 1; n <= 16; n++) {
8329       for (uint32_t m = 1; m <= 4; m++) {
8330         GemmMicrokernelTester()
8331           .mr(4)
8332           .nr(16)
8333           .kr(1)
8334           .sr(1)
8335           .m(m)
8336           .n(n)
8337           .k(8)
8338           .iterations(1)
8339           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8340       }
8341     }
8342   }
8343 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,k_eq_8_subtile_m)8344   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_eq_8_subtile_m) {
8345     TEST_REQUIRES_ARM_NEON;
8346     for (uint32_t m = 1; m <= 4; m++) {
8347       GemmMicrokernelTester()
8348         .mr(4)
8349         .nr(16)
8350         .kr(1)
8351         .sr(1)
8352         .m(m)
8353         .n(16)
8354         .k(8)
8355         .iterations(1)
8356         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8357     }
8358   }
8359 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,k_eq_8_subtile_n)8360   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_eq_8_subtile_n) {
8361     TEST_REQUIRES_ARM_NEON;
8362     for (uint32_t n = 1; n <= 16; n++) {
8363       GemmMicrokernelTester()
8364         .mr(4)
8365         .nr(16)
8366         .kr(1)
8367         .sr(1)
8368         .m(4)
8369         .n(n)
8370         .k(8)
8371         .iterations(1)
8372         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8373     }
8374   }
8375 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,k_lt_8)8376   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_lt_8) {
8377     TEST_REQUIRES_ARM_NEON;
8378     for (size_t k = 1; k < 8; k++) {
8379       GemmMicrokernelTester()
8380         .mr(4)
8381         .nr(16)
8382         .kr(1)
8383         .sr(1)
8384         .m(4)
8385         .n(16)
8386         .k(k)
8387         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8388     }
8389   }
8390 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,k_lt_8_subtile)8391   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_lt_8_subtile) {
8392     TEST_REQUIRES_ARM_NEON;
8393     for (size_t k = 1; k < 8; k++) {
8394       for (uint32_t n = 1; n <= 16; n++) {
8395         for (uint32_t m = 1; m <= 4; m++) {
8396           GemmMicrokernelTester()
8397             .mr(4)
8398             .nr(16)
8399             .kr(1)
8400             .sr(1)
8401             .m(m)
8402             .n(n)
8403             .k(k)
8404             .iterations(1)
8405             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8406         }
8407       }
8408     }
8409   }
8410 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,k_gt_8)8411   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_gt_8) {
8412     TEST_REQUIRES_ARM_NEON;
8413     for (size_t k = 9; k < 16; k++) {
8414       GemmMicrokernelTester()
8415         .mr(4)
8416         .nr(16)
8417         .kr(1)
8418         .sr(1)
8419         .m(4)
8420         .n(16)
8421         .k(k)
8422         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8423     }
8424   }
8425 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,k_gt_8_subtile)8426   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_gt_8_subtile) {
8427     TEST_REQUIRES_ARM_NEON;
8428     for (size_t k = 9; k < 16; k++) {
8429       for (uint32_t n = 1; n <= 16; n++) {
8430         for (uint32_t m = 1; m <= 4; m++) {
8431           GemmMicrokernelTester()
8432             .mr(4)
8433             .nr(16)
8434             .kr(1)
8435             .sr(1)
8436             .m(m)
8437             .n(n)
8438             .k(k)
8439             .iterations(1)
8440             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8441         }
8442       }
8443     }
8444   }
8445 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,k_div_8)8446   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_div_8) {
8447     TEST_REQUIRES_ARM_NEON;
8448     for (size_t k = 16; k <= 80; k += 8) {
8449       GemmMicrokernelTester()
8450         .mr(4)
8451         .nr(16)
8452         .kr(1)
8453         .sr(1)
8454         .m(4)
8455         .n(16)
8456         .k(k)
8457         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8458     }
8459   }
8460 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,k_div_8_subtile)8461   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_div_8_subtile) {
8462     TEST_REQUIRES_ARM_NEON;
8463     for (size_t k = 16; k <= 80; k += 8) {
8464       for (uint32_t n = 1; n <= 16; n++) {
8465         for (uint32_t m = 1; m <= 4; m++) {
8466           GemmMicrokernelTester()
8467             .mr(4)
8468             .nr(16)
8469             .kr(1)
8470             .sr(1)
8471             .m(m)
8472             .n(n)
8473             .k(k)
8474             .iterations(1)
8475             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8476         }
8477       }
8478     }
8479   }
8480 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,n_gt_16)8481   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_gt_16) {
8482     TEST_REQUIRES_ARM_NEON;
8483     for (uint32_t n = 17; n < 32; n++) {
8484       for (size_t k = 1; k <= 40; k += 9) {
8485         GemmMicrokernelTester()
8486           .mr(4)
8487           .nr(16)
8488           .kr(1)
8489           .sr(1)
8490           .m(4)
8491           .n(n)
8492           .k(k)
8493           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8494       }
8495     }
8496   }
8497 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,n_gt_16_strided_cn)8498   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_gt_16_strided_cn) {
8499     TEST_REQUIRES_ARM_NEON;
8500     for (uint32_t n = 17; n < 32; n++) {
8501       for (size_t k = 1; k <= 40; k += 9) {
8502         GemmMicrokernelTester()
8503           .mr(4)
8504           .nr(16)
8505           .kr(1)
8506           .sr(1)
8507           .m(4)
8508           .n(n)
8509           .k(k)
8510           .cn_stride(19)
8511           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8512       }
8513     }
8514   }
8515 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,n_gt_16_subtile)8516   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_gt_16_subtile) {
8517     TEST_REQUIRES_ARM_NEON;
8518     for (uint32_t n = 17; n < 32; n++) {
8519       for (size_t k = 1; k <= 40; k += 9) {
8520         for (uint32_t m = 1; m <= 4; m++) {
8521           GemmMicrokernelTester()
8522             .mr(4)
8523             .nr(16)
8524             .kr(1)
8525             .sr(1)
8526             .m(m)
8527             .n(n)
8528             .k(k)
8529             .iterations(1)
8530             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8531         }
8532       }
8533     }
8534   }
8535 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,n_div_16)8536   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_div_16) {
8537     TEST_REQUIRES_ARM_NEON;
8538     for (uint32_t n = 32; n <= 48; n += 16) {
8539       for (size_t k = 1; k <= 40; k += 9) {
8540         GemmMicrokernelTester()
8541           .mr(4)
8542           .nr(16)
8543           .kr(1)
8544           .sr(1)
8545           .m(4)
8546           .n(n)
8547           .k(k)
8548           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8549       }
8550     }
8551   }
8552 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,n_div_16_strided_cn)8553   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_div_16_strided_cn) {
8554     TEST_REQUIRES_ARM_NEON;
8555     for (uint32_t n = 32; n <= 48; n += 16) {
8556       for (size_t k = 1; k <= 40; k += 9) {
8557         GemmMicrokernelTester()
8558           .mr(4)
8559           .nr(16)
8560           .kr(1)
8561           .sr(1)
8562           .m(4)
8563           .n(n)
8564           .k(k)
8565           .cn_stride(19)
8566           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8567       }
8568     }
8569   }
8570 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,n_div_16_subtile)8571   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_div_16_subtile) {
8572     TEST_REQUIRES_ARM_NEON;
8573     for (uint32_t n = 32; n <= 48; n += 16) {
8574       for (size_t k = 1; k <= 40; k += 9) {
8575         for (uint32_t m = 1; m <= 4; m++) {
8576           GemmMicrokernelTester()
8577             .mr(4)
8578             .nr(16)
8579             .kr(1)
8580             .sr(1)
8581             .m(m)
8582             .n(n)
8583             .k(k)
8584             .iterations(1)
8585             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8586         }
8587       }
8588     }
8589   }
8590 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,small_kernel)8591   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, small_kernel) {
8592     TEST_REQUIRES_ARM_NEON;
8593     for (size_t k = 1; k <= 40; k += 9) {
8594       GemmMicrokernelTester()
8595         .mr(4)
8596         .nr(16)
8597         .kr(1)
8598         .sr(1)
8599         .m(4)
8600         .n(16)
8601         .k(k)
8602         .ks(3)
8603         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8604     }
8605   }
8606 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,small_kernel_subtile)8607   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, small_kernel_subtile) {
8608     TEST_REQUIRES_ARM_NEON;
8609     for (size_t k = 1; k <= 40; k += 9) {
8610       for (uint32_t n = 1; n <= 16; n++) {
8611         for (uint32_t m = 1; m <= 4; m++) {
8612           GemmMicrokernelTester()
8613             .mr(4)
8614             .nr(16)
8615             .kr(1)
8616             .sr(1)
8617             .m(m)
8618             .n(n)
8619             .k(k)
8620             .ks(3)
8621             .iterations(1)
8622             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8623         }
8624       }
8625     }
8626   }
8627 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,n_gt_16_small_kernel)8628   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_gt_16_small_kernel) {
8629     TEST_REQUIRES_ARM_NEON;
8630     for (uint32_t n = 17; n < 32; n++) {
8631       for (size_t k = 1; k <= 40; k += 9) {
8632         GemmMicrokernelTester()
8633           .mr(4)
8634           .nr(16)
8635           .kr(1)
8636           .sr(1)
8637           .m(4)
8638           .n(n)
8639           .k(k)
8640           .ks(3)
8641           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8642       }
8643     }
8644   }
8645 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,n_div_16_small_kernel)8646   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_div_16_small_kernel) {
8647     TEST_REQUIRES_ARM_NEON;
8648     for (uint32_t n = 32; n <= 48; n += 16) {
8649       for (size_t k = 1; k <= 40; k += 9) {
8650         GemmMicrokernelTester()
8651           .mr(4)
8652           .nr(16)
8653           .kr(1)
8654           .sr(1)
8655           .m(4)
8656           .n(n)
8657           .k(k)
8658           .ks(3)
8659           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8660       }
8661     }
8662   }
8663 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,strided_cm_subtile)8664   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, strided_cm_subtile) {
8665     TEST_REQUIRES_ARM_NEON;
8666     for (size_t k = 1; k <= 40; k += 9) {
8667       for (uint32_t n = 1; n <= 16; n++) {
8668         for (uint32_t m = 1; m <= 4; m++) {
8669           GemmMicrokernelTester()
8670             .mr(4)
8671             .nr(16)
8672             .kr(1)
8673             .sr(1)
8674             .m(m)
8675             .n(n)
8676             .k(k)
8677             .cm_stride(19)
8678             .iterations(1)
8679             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8680         }
8681       }
8682     }
8683   }
8684 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,a_offset)8685   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, a_offset) {
8686     TEST_REQUIRES_ARM_NEON;
8687     for (size_t k = 1; k <= 40; k += 9) {
8688       GemmMicrokernelTester()
8689         .mr(4)
8690         .nr(16)
8691         .kr(1)
8692         .sr(1)
8693         .m(4)
8694         .n(16)
8695         .k(k)
8696         .ks(3)
8697         .a_offset(163)
8698         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8699     }
8700   }
8701 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,zero)8702   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, zero) {
8703     TEST_REQUIRES_ARM_NEON;
8704     for (size_t k = 1; k <= 40; k += 9) {
8705       for (uint32_t mz = 0; mz < 4; mz++) {
8706         GemmMicrokernelTester()
8707           .mr(4)
8708           .nr(16)
8709           .kr(1)
8710           .sr(1)
8711           .m(4)
8712           .n(16)
8713           .k(k)
8714           .ks(3)
8715           .a_offset(163)
8716           .zero_index(mz)
8717           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8718       }
8719     }
8720   }
8721 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,qmin)8722   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, qmin) {
8723     TEST_REQUIRES_ARM_NEON;
8724     GemmMicrokernelTester()
8725       .mr(4)
8726       .nr(16)
8727       .kr(1)
8728       .sr(1)
8729       .m(4)
8730       .n(16)
8731       .k(8)
8732       .qmin(128)
8733       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8734   }
8735 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,qmax)8736   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, qmax) {
8737     TEST_REQUIRES_ARM_NEON;
8738     GemmMicrokernelTester()
8739       .mr(4)
8740       .nr(16)
8741       .kr(1)
8742       .sr(1)
8743       .m(4)
8744       .n(16)
8745       .k(8)
8746       .qmax(128)
8747       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8748   }
8749 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,strided_cm)8750   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, strided_cm) {
8751     TEST_REQUIRES_ARM_NEON;
8752     GemmMicrokernelTester()
8753       .mr(4)
8754       .nr(16)
8755       .kr(1)
8756       .sr(1)
8757       .m(4)
8758       .n(16)
8759       .k(8)
8760       .cm_stride(19)
8761       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8762   }
8763 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,no_a_zero_point)8764   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, no_a_zero_point) {
8765     TEST_REQUIRES_ARM_NEON;
8766     for (size_t k = 1; k <= 40; k += 9) {
8767       GemmMicrokernelTester()
8768         .mr(4)
8769         .nr(16)
8770         .kr(1)
8771         .sr(1)
8772         .m(4)
8773         .n(16)
8774         .k(k)
8775         .a_zero_point(0)
8776         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8777     }
8778   }
8779 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,no_b_zero_point)8780   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, no_b_zero_point) {
8781     TEST_REQUIRES_ARM_NEON;
8782     for (size_t k = 1; k <= 40; k += 9) {
8783       GemmMicrokernelTester()
8784         .mr(4)
8785         .nr(16)
8786         .kr(1)
8787         .sr(1)
8788         .m(4)
8789         .n(16)
8790         .k(k)
8791         .b_zero_point(0)
8792         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8793     }
8794   }
8795 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,no_zero_point)8796   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, no_zero_point) {
8797     TEST_REQUIRES_ARM_NEON;
8798     for (size_t k = 1; k <= 40; k += 9) {
8799       GemmMicrokernelTester()
8800         .mr(4)
8801         .nr(16)
8802         .kr(1)
8803         .sr(1)
8804         .m(4)
8805         .n(16)
8806         .k(k)
8807         .a_zero_point(0)
8808         .b_zero_point(0)
8809         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8810     }
8811   }
8812 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
8813 
8814 
8815 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8)8816   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8) {
8817     TEST_REQUIRES_ARM_NEON;
8818     GemmMicrokernelTester()
8819       .mr(4)
8820       .nr(16)
8821       .kr(1)
8822       .sr(1)
8823       .m(4)
8824       .n(16)
8825       .k(8)
8826       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8827   }
8828 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cn)8829   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) {
8830     TEST_REQUIRES_ARM_NEON;
8831     GemmMicrokernelTester()
8832       .mr(4)
8833       .nr(16)
8834       .kr(1)
8835       .sr(1)
8836       .m(4)
8837       .n(16)
8838       .k(8)
8839       .cn_stride(19)
8840       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8841   }
8842 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile)8843   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) {
8844     TEST_REQUIRES_ARM_NEON;
8845     for (uint32_t n = 1; n <= 16; n++) {
8846       for (uint32_t m = 1; m <= 4; m++) {
8847         GemmMicrokernelTester()
8848           .mr(4)
8849           .nr(16)
8850           .kr(1)
8851           .sr(1)
8852           .m(m)
8853           .n(n)
8854           .k(8)
8855           .iterations(1)
8856           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8857       }
8858     }
8859   }
8860 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_m)8861   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_m) {
8862     TEST_REQUIRES_ARM_NEON;
8863     for (uint32_t m = 1; m <= 4; m++) {
8864       GemmMicrokernelTester()
8865         .mr(4)
8866         .nr(16)
8867         .kr(1)
8868         .sr(1)
8869         .m(m)
8870         .n(16)
8871         .k(8)
8872         .iterations(1)
8873         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8874     }
8875   }
8876 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_n)8877   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_n) {
8878     TEST_REQUIRES_ARM_NEON;
8879     for (uint32_t n = 1; n <= 16; n++) {
8880       GemmMicrokernelTester()
8881         .mr(4)
8882         .nr(16)
8883         .kr(1)
8884         .sr(1)
8885         .m(4)
8886         .n(n)
8887         .k(8)
8888         .iterations(1)
8889         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8890     }
8891   }
8892 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8)8893   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8) {
8894     TEST_REQUIRES_ARM_NEON;
8895     for (size_t k = 1; k < 8; k++) {
8896       GemmMicrokernelTester()
8897         .mr(4)
8898         .nr(16)
8899         .kr(1)
8900         .sr(1)
8901         .m(4)
8902         .n(16)
8903         .k(k)
8904         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8905     }
8906   }
8907 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8_subtile)8908   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_subtile) {
8909     TEST_REQUIRES_ARM_NEON;
8910     for (size_t k = 1; k < 8; k++) {
8911       for (uint32_t n = 1; n <= 16; n++) {
8912         for (uint32_t m = 1; m <= 4; m++) {
8913           GemmMicrokernelTester()
8914             .mr(4)
8915             .nr(16)
8916             .kr(1)
8917             .sr(1)
8918             .m(m)
8919             .n(n)
8920             .k(k)
8921             .iterations(1)
8922             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8923         }
8924       }
8925     }
8926   }
8927 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8)8928   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8) {
8929     TEST_REQUIRES_ARM_NEON;
8930     for (size_t k = 9; k < 16; k++) {
8931       GemmMicrokernelTester()
8932         .mr(4)
8933         .nr(16)
8934         .kr(1)
8935         .sr(1)
8936         .m(4)
8937         .n(16)
8938         .k(k)
8939         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8940     }
8941   }
8942 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8_subtile)8943   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_subtile) {
8944     TEST_REQUIRES_ARM_NEON;
8945     for (size_t k = 9; k < 16; k++) {
8946       for (uint32_t n = 1; n <= 16; n++) {
8947         for (uint32_t m = 1; m <= 4; m++) {
8948           GemmMicrokernelTester()
8949             .mr(4)
8950             .nr(16)
8951             .kr(1)
8952             .sr(1)
8953             .m(m)
8954             .n(n)
8955             .k(k)
8956             .iterations(1)
8957             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8958         }
8959       }
8960     }
8961   }
8962 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_div_8)8963   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8) {
8964     TEST_REQUIRES_ARM_NEON;
8965     for (size_t k = 16; k <= 80; k += 8) {
8966       GemmMicrokernelTester()
8967         .mr(4)
8968         .nr(16)
8969         .kr(1)
8970         .sr(1)
8971         .m(4)
8972         .n(16)
8973         .k(k)
8974         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8975     }
8976   }
8977 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_div_8_subtile)8978   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_subtile) {
8979     TEST_REQUIRES_ARM_NEON;
8980     for (size_t k = 16; k <= 80; k += 8) {
8981       for (uint32_t n = 1; n <= 16; n++) {
8982         for (uint32_t m = 1; m <= 4; m++) {
8983           GemmMicrokernelTester()
8984             .mr(4)
8985             .nr(16)
8986             .kr(1)
8987             .sr(1)
8988             .m(m)
8989             .n(n)
8990             .k(k)
8991             .iterations(1)
8992             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
8993         }
8994       }
8995     }
8996   }
8997 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_16)8998   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16) {
8999     TEST_REQUIRES_ARM_NEON;
9000     for (uint32_t n = 17; n < 32; n++) {
9001       for (size_t k = 1; k <= 40; k += 9) {
9002         GemmMicrokernelTester()
9003           .mr(4)
9004           .nr(16)
9005           .kr(1)
9006           .sr(1)
9007           .m(4)
9008           .n(n)
9009           .k(k)
9010           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9011       }
9012     }
9013   }
9014 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_16_strided_cn)9015   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_strided_cn) {
9016     TEST_REQUIRES_ARM_NEON;
9017     for (uint32_t n = 17; n < 32; n++) {
9018       for (size_t k = 1; k <= 40; k += 9) {
9019         GemmMicrokernelTester()
9020           .mr(4)
9021           .nr(16)
9022           .kr(1)
9023           .sr(1)
9024           .m(4)
9025           .n(n)
9026           .k(k)
9027           .cn_stride(19)
9028           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9029       }
9030     }
9031   }
9032 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_16_subtile)9033   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_subtile) {
9034     TEST_REQUIRES_ARM_NEON;
9035     for (uint32_t n = 17; n < 32; n++) {
9036       for (size_t k = 1; k <= 40; k += 9) {
9037         for (uint32_t m = 1; m <= 4; m++) {
9038           GemmMicrokernelTester()
9039             .mr(4)
9040             .nr(16)
9041             .kr(1)
9042             .sr(1)
9043             .m(m)
9044             .n(n)
9045             .k(k)
9046             .iterations(1)
9047             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9048         }
9049       }
9050     }
9051   }
9052 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_16)9053   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16) {
9054     TEST_REQUIRES_ARM_NEON;
9055     for (uint32_t n = 32; n <= 48; n += 16) {
9056       for (size_t k = 1; k <= 40; k += 9) {
9057         GemmMicrokernelTester()
9058           .mr(4)
9059           .nr(16)
9060           .kr(1)
9061           .sr(1)
9062           .m(4)
9063           .n(n)
9064           .k(k)
9065           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9066       }
9067     }
9068   }
9069 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_16_strided_cn)9070   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_strided_cn) {
9071     TEST_REQUIRES_ARM_NEON;
9072     for (uint32_t n = 32; n <= 48; n += 16) {
9073       for (size_t k = 1; k <= 40; k += 9) {
9074         GemmMicrokernelTester()
9075           .mr(4)
9076           .nr(16)
9077           .kr(1)
9078           .sr(1)
9079           .m(4)
9080           .n(n)
9081           .k(k)
9082           .cn_stride(19)
9083           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9084       }
9085     }
9086   }
9087 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_16_subtile)9088   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_subtile) {
9089     TEST_REQUIRES_ARM_NEON;
9090     for (uint32_t n = 32; n <= 48; n += 16) {
9091       for (size_t k = 1; k <= 40; k += 9) {
9092         for (uint32_t m = 1; m <= 4; m++) {
9093           GemmMicrokernelTester()
9094             .mr(4)
9095             .nr(16)
9096             .kr(1)
9097             .sr(1)
9098             .m(m)
9099             .n(n)
9100             .k(k)
9101             .iterations(1)
9102             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9103         }
9104       }
9105     }
9106   }
9107 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,small_kernel)9108   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel) {
9109     TEST_REQUIRES_ARM_NEON;
9110     for (size_t k = 1; k <= 40; k += 9) {
9111       GemmMicrokernelTester()
9112         .mr(4)
9113         .nr(16)
9114         .kr(1)
9115         .sr(1)
9116         .m(4)
9117         .n(16)
9118         .k(k)
9119         .ks(3)
9120         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9121     }
9122   }
9123 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,small_kernel_subtile)9124   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel_subtile) {
9125     TEST_REQUIRES_ARM_NEON;
9126     for (size_t k = 1; k <= 40; k += 9) {
9127       for (uint32_t n = 1; n <= 16; n++) {
9128         for (uint32_t m = 1; m <= 4; m++) {
9129           GemmMicrokernelTester()
9130             .mr(4)
9131             .nr(16)
9132             .kr(1)
9133             .sr(1)
9134             .m(m)
9135             .n(n)
9136             .k(k)
9137             .ks(3)
9138             .iterations(1)
9139             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9140         }
9141       }
9142     }
9143   }
9144 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_16_small_kernel)9145   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_small_kernel) {
9146     TEST_REQUIRES_ARM_NEON;
9147     for (uint32_t n = 17; n < 32; n++) {
9148       for (size_t k = 1; k <= 40; k += 9) {
9149         GemmMicrokernelTester()
9150           .mr(4)
9151           .nr(16)
9152           .kr(1)
9153           .sr(1)
9154           .m(4)
9155           .n(n)
9156           .k(k)
9157           .ks(3)
9158           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9159       }
9160     }
9161   }
9162 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_16_small_kernel)9163   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_small_kernel) {
9164     TEST_REQUIRES_ARM_NEON;
9165     for (uint32_t n = 32; n <= 48; n += 16) {
9166       for (size_t k = 1; k <= 40; k += 9) {
9167         GemmMicrokernelTester()
9168           .mr(4)
9169           .nr(16)
9170           .kr(1)
9171           .sr(1)
9172           .m(4)
9173           .n(n)
9174           .k(k)
9175           .ks(3)
9176           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9177       }
9178     }
9179   }
9180 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cm_subtile)9181   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm_subtile) {
9182     TEST_REQUIRES_ARM_NEON;
9183     for (size_t k = 1; k <= 40; k += 9) {
9184       for (uint32_t n = 1; n <= 16; n++) {
9185         for (uint32_t m = 1; m <= 4; m++) {
9186           GemmMicrokernelTester()
9187             .mr(4)
9188             .nr(16)
9189             .kr(1)
9190             .sr(1)
9191             .m(m)
9192             .n(n)
9193             .k(k)
9194             .cm_stride(19)
9195             .iterations(1)
9196             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9197         }
9198       }
9199     }
9200   }
9201 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,a_offset)9202   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, a_offset) {
9203     TEST_REQUIRES_ARM_NEON;
9204     for (size_t k = 1; k <= 40; k += 9) {
9205       GemmMicrokernelTester()
9206         .mr(4)
9207         .nr(16)
9208         .kr(1)
9209         .sr(1)
9210         .m(4)
9211         .n(16)
9212         .k(k)
9213         .ks(3)
9214         .a_offset(163)
9215         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9216     }
9217   }
9218 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,zero)9219   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, zero) {
9220     TEST_REQUIRES_ARM_NEON;
9221     for (size_t k = 1; k <= 40; k += 9) {
9222       for (uint32_t mz = 0; mz < 4; mz++) {
9223         GemmMicrokernelTester()
9224           .mr(4)
9225           .nr(16)
9226           .kr(1)
9227           .sr(1)
9228           .m(4)
9229           .n(16)
9230           .k(k)
9231           .ks(3)
9232           .a_offset(163)
9233           .zero_index(mz)
9234           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9235       }
9236     }
9237   }
9238 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,qmin)9239   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmin) {
9240     TEST_REQUIRES_ARM_NEON;
9241     GemmMicrokernelTester()
9242       .mr(4)
9243       .nr(16)
9244       .kr(1)
9245       .sr(1)
9246       .m(4)
9247       .n(16)
9248       .k(8)
9249       .qmin(128)
9250       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9251   }
9252 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,qmax)9253   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmax) {
9254     TEST_REQUIRES_ARM_NEON;
9255     GemmMicrokernelTester()
9256       .mr(4)
9257       .nr(16)
9258       .kr(1)
9259       .sr(1)
9260       .m(4)
9261       .n(16)
9262       .k(8)
9263       .qmax(128)
9264       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9265   }
9266 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cm)9267   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) {
9268     TEST_REQUIRES_ARM_NEON;
9269     GemmMicrokernelTester()
9270       .mr(4)
9271       .nr(16)
9272       .kr(1)
9273       .sr(1)
9274       .m(4)
9275       .n(16)
9276       .k(8)
9277       .cm_stride(19)
9278       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9279   }
9280 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,no_a_zero_point)9281   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, no_a_zero_point) {
9282     TEST_REQUIRES_ARM_NEON;
9283     for (size_t k = 1; k <= 40; k += 9) {
9284       GemmMicrokernelTester()
9285         .mr(4)
9286         .nr(16)
9287         .kr(1)
9288         .sr(1)
9289         .m(4)
9290         .n(16)
9291         .k(k)
9292         .a_zero_point(0)
9293         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9294     }
9295   }
9296 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,no_b_zero_point)9297   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, no_b_zero_point) {
9298     TEST_REQUIRES_ARM_NEON;
9299     for (size_t k = 1; k <= 40; k += 9) {
9300       GemmMicrokernelTester()
9301         .mr(4)
9302         .nr(16)
9303         .kr(1)
9304         .sr(1)
9305         .m(4)
9306         .n(16)
9307         .k(k)
9308         .b_zero_point(0)
9309         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9310     }
9311   }
9312 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,no_zero_point)9313   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, no_zero_point) {
9314     TEST_REQUIRES_ARM_NEON;
9315     for (size_t k = 1; k <= 40; k += 9) {
9316       GemmMicrokernelTester()
9317         .mr(4)
9318         .nr(16)
9319         .kr(1)
9320         .sr(1)
9321         .m(4)
9322         .n(16)
9323         .k(k)
9324         .a_zero_point(0)
9325         .b_zero_point(0)
9326         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9327     }
9328   }
9329 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
9330 
9331 
9332 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8)9333   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) {
9334     TEST_REQUIRES_ARM_NEON;
9335     GemmMicrokernelTester()
9336       .mr(4)
9337       .nr(16)
9338       .kr(1)
9339       .sr(1)
9340       .m(4)
9341       .n(16)
9342       .k(8)
9343       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9344   }
9345 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,strided_cn)9346   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cn) {
9347     TEST_REQUIRES_ARM_NEON;
9348     GemmMicrokernelTester()
9349       .mr(4)
9350       .nr(16)
9351       .kr(1)
9352       .sr(1)
9353       .m(4)
9354       .n(16)
9355       .k(8)
9356       .cn_stride(19)
9357       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9358   }
9359 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile)9360   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
9361     TEST_REQUIRES_ARM_NEON;
9362     for (uint32_t n = 1; n <= 16; n++) {
9363       for (uint32_t m = 1; m <= 4; m++) {
9364         GemmMicrokernelTester()
9365           .mr(4)
9366           .nr(16)
9367           .kr(1)
9368           .sr(1)
9369           .m(m)
9370           .n(n)
9371           .k(8)
9372           .iterations(1)
9373           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9374       }
9375     }
9376   }
9377 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_m)9378   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
9379     TEST_REQUIRES_ARM_NEON;
9380     for (uint32_t m = 1; m <= 4; m++) {
9381       GemmMicrokernelTester()
9382         .mr(4)
9383         .nr(16)
9384         .kr(1)
9385         .sr(1)
9386         .m(m)
9387         .n(16)
9388         .k(8)
9389         .iterations(1)
9390         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9391     }
9392   }
9393 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_n)9394   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
9395     TEST_REQUIRES_ARM_NEON;
9396     for (uint32_t n = 1; n <= 16; n++) {
9397       GemmMicrokernelTester()
9398         .mr(4)
9399         .nr(16)
9400         .kr(1)
9401         .sr(1)
9402         .m(4)
9403         .n(n)
9404         .k(8)
9405         .iterations(1)
9406         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9407     }
9408   }
9409 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_lt_8)9410   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) {
9411     TEST_REQUIRES_ARM_NEON;
9412     for (size_t k = 1; k < 8; k++) {
9413       GemmMicrokernelTester()
9414         .mr(4)
9415         .nr(16)
9416         .kr(1)
9417         .sr(1)
9418         .m(4)
9419         .n(16)
9420         .k(k)
9421         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9422     }
9423   }
9424 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_lt_8_subtile)9425   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
9426     TEST_REQUIRES_ARM_NEON;
9427     for (size_t k = 1; k < 8; k++) {
9428       for (uint32_t n = 1; n <= 16; n++) {
9429         for (uint32_t m = 1; m <= 4; m++) {
9430           GemmMicrokernelTester()
9431             .mr(4)
9432             .nr(16)
9433             .kr(1)
9434             .sr(1)
9435             .m(m)
9436             .n(n)
9437             .k(k)
9438             .iterations(1)
9439             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9440         }
9441       }
9442     }
9443   }
9444 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_gt_8)9445   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) {
9446     TEST_REQUIRES_ARM_NEON;
9447     for (size_t k = 9; k < 16; k++) {
9448       GemmMicrokernelTester()
9449         .mr(4)
9450         .nr(16)
9451         .kr(1)
9452         .sr(1)
9453         .m(4)
9454         .n(16)
9455         .k(k)
9456         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9457     }
9458   }
9459 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_gt_8_subtile)9460   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
9461     TEST_REQUIRES_ARM_NEON;
9462     for (size_t k = 9; k < 16; k++) {
9463       for (uint32_t n = 1; n <= 16; n++) {
9464         for (uint32_t m = 1; m <= 4; m++) {
9465           GemmMicrokernelTester()
9466             .mr(4)
9467             .nr(16)
9468             .kr(1)
9469             .sr(1)
9470             .m(m)
9471             .n(n)
9472             .k(k)
9473             .iterations(1)
9474             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9475         }
9476       }
9477     }
9478   }
9479 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_div_8)9480   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8) {
9481     TEST_REQUIRES_ARM_NEON;
9482     for (size_t k = 16; k <= 80; k += 8) {
9483       GemmMicrokernelTester()
9484         .mr(4)
9485         .nr(16)
9486         .kr(1)
9487         .sr(1)
9488         .m(4)
9489         .n(16)
9490         .k(k)
9491         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9492     }
9493   }
9494 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_div_8_subtile)9495   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
9496     TEST_REQUIRES_ARM_NEON;
9497     for (size_t k = 16; k <= 80; k += 8) {
9498       for (uint32_t n = 1; n <= 16; n++) {
9499         for (uint32_t m = 1; m <= 4; m++) {
9500           GemmMicrokernelTester()
9501             .mr(4)
9502             .nr(16)
9503             .kr(1)
9504             .sr(1)
9505             .m(m)
9506             .n(n)
9507             .k(k)
9508             .iterations(1)
9509             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9510         }
9511       }
9512     }
9513   }
9514 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_16)9515   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16) {
9516     TEST_REQUIRES_ARM_NEON;
9517     for (uint32_t n = 17; n < 32; n++) {
9518       for (size_t k = 1; k <= 40; k += 9) {
9519         GemmMicrokernelTester()
9520           .mr(4)
9521           .nr(16)
9522           .kr(1)
9523           .sr(1)
9524           .m(4)
9525           .n(n)
9526           .k(k)
9527           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9528       }
9529     }
9530   }
9531 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_16_strided_cn)9532   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_strided_cn) {
9533     TEST_REQUIRES_ARM_NEON;
9534     for (uint32_t n = 17; n < 32; n++) {
9535       for (size_t k = 1; k <= 40; k += 9) {
9536         GemmMicrokernelTester()
9537           .mr(4)
9538           .nr(16)
9539           .kr(1)
9540           .sr(1)
9541           .m(4)
9542           .n(n)
9543           .k(k)
9544           .cn_stride(19)
9545           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9546       }
9547     }
9548   }
9549 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_16_subtile)9550   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_subtile) {
9551     TEST_REQUIRES_ARM_NEON;
9552     for (uint32_t n = 17; n < 32; n++) {
9553       for (size_t k = 1; k <= 40; k += 9) {
9554         for (uint32_t m = 1; m <= 4; m++) {
9555           GemmMicrokernelTester()
9556             .mr(4)
9557             .nr(16)
9558             .kr(1)
9559             .sr(1)
9560             .m(m)
9561             .n(n)
9562             .k(k)
9563             .iterations(1)
9564             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9565         }
9566       }
9567     }
9568   }
9569 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_16)9570   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16) {
9571     TEST_REQUIRES_ARM_NEON;
9572     for (uint32_t n = 32; n <= 48; n += 16) {
9573       for (size_t k = 1; k <= 40; k += 9) {
9574         GemmMicrokernelTester()
9575           .mr(4)
9576           .nr(16)
9577           .kr(1)
9578           .sr(1)
9579           .m(4)
9580           .n(n)
9581           .k(k)
9582           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9583       }
9584     }
9585   }
9586 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_16_strided_cn)9587   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_strided_cn) {
9588     TEST_REQUIRES_ARM_NEON;
9589     for (uint32_t n = 32; n <= 48; n += 16) {
9590       for (size_t k = 1; k <= 40; k += 9) {
9591         GemmMicrokernelTester()
9592           .mr(4)
9593           .nr(16)
9594           .kr(1)
9595           .sr(1)
9596           .m(4)
9597           .n(n)
9598           .k(k)
9599           .cn_stride(19)
9600           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9601       }
9602     }
9603   }
9604 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_16_subtile)9605   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_subtile) {
9606     TEST_REQUIRES_ARM_NEON;
9607     for (uint32_t n = 32; n <= 48; n += 16) {
9608       for (size_t k = 1; k <= 40; k += 9) {
9609         for (uint32_t m = 1; m <= 4; m++) {
9610           GemmMicrokernelTester()
9611             .mr(4)
9612             .nr(16)
9613             .kr(1)
9614             .sr(1)
9615             .m(m)
9616             .n(n)
9617             .k(k)
9618             .iterations(1)
9619             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9620         }
9621       }
9622     }
9623   }
9624 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,small_kernel)9625   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, small_kernel) {
9626     TEST_REQUIRES_ARM_NEON;
9627     for (size_t k = 1; k <= 40; k += 9) {
9628       GemmMicrokernelTester()
9629         .mr(4)
9630         .nr(16)
9631         .kr(1)
9632         .sr(1)
9633         .m(4)
9634         .n(16)
9635         .k(k)
9636         .ks(3)
9637         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9638     }
9639   }
9640 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,small_kernel_subtile)9641   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, small_kernel_subtile) {
9642     TEST_REQUIRES_ARM_NEON;
9643     for (size_t k = 1; k <= 40; k += 9) {
9644       for (uint32_t n = 1; n <= 16; n++) {
9645         for (uint32_t m = 1; m <= 4; m++) {
9646           GemmMicrokernelTester()
9647             .mr(4)
9648             .nr(16)
9649             .kr(1)
9650             .sr(1)
9651             .m(m)
9652             .n(n)
9653             .k(k)
9654             .ks(3)
9655             .iterations(1)
9656             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9657         }
9658       }
9659     }
9660   }
9661 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_16_small_kernel)9662   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_small_kernel) {
9663     TEST_REQUIRES_ARM_NEON;
9664     for (uint32_t n = 17; n < 32; n++) {
9665       for (size_t k = 1; k <= 40; k += 9) {
9666         GemmMicrokernelTester()
9667           .mr(4)
9668           .nr(16)
9669           .kr(1)
9670           .sr(1)
9671           .m(4)
9672           .n(n)
9673           .k(k)
9674           .ks(3)
9675           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9676       }
9677     }
9678   }
9679 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_16_small_kernel)9680   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_small_kernel) {
9681     TEST_REQUIRES_ARM_NEON;
9682     for (uint32_t n = 32; n <= 48; n += 16) {
9683       for (size_t k = 1; k <= 40; k += 9) {
9684         GemmMicrokernelTester()
9685           .mr(4)
9686           .nr(16)
9687           .kr(1)
9688           .sr(1)
9689           .m(4)
9690           .n(n)
9691           .k(k)
9692           .ks(3)
9693           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9694       }
9695     }
9696   }
9697 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,strided_cm_subtile)9698   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
9699     TEST_REQUIRES_ARM_NEON;
9700     for (size_t k = 1; k <= 40; k += 9) {
9701       for (uint32_t n = 1; n <= 16; n++) {
9702         for (uint32_t m = 1; m <= 4; m++) {
9703           GemmMicrokernelTester()
9704             .mr(4)
9705             .nr(16)
9706             .kr(1)
9707             .sr(1)
9708             .m(m)
9709             .n(n)
9710             .k(k)
9711             .cm_stride(19)
9712             .iterations(1)
9713             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9714         }
9715       }
9716     }
9717   }
9718 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,a_offset)9719   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, a_offset) {
9720     TEST_REQUIRES_ARM_NEON;
9721     for (size_t k = 1; k <= 40; k += 9) {
9722       GemmMicrokernelTester()
9723         .mr(4)
9724         .nr(16)
9725         .kr(1)
9726         .sr(1)
9727         .m(4)
9728         .n(16)
9729         .k(k)
9730         .ks(3)
9731         .a_offset(163)
9732         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9733     }
9734   }
9735 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,zero)9736   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, zero) {
9737     TEST_REQUIRES_ARM_NEON;
9738     for (size_t k = 1; k <= 40; k += 9) {
9739       for (uint32_t mz = 0; mz < 4; mz++) {
9740         GemmMicrokernelTester()
9741           .mr(4)
9742           .nr(16)
9743           .kr(1)
9744           .sr(1)
9745           .m(4)
9746           .n(16)
9747           .k(k)
9748           .ks(3)
9749           .a_offset(163)
9750           .zero_index(mz)
9751           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9752       }
9753     }
9754   }
9755 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,qmin)9756   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmin) {
9757     TEST_REQUIRES_ARM_NEON;
9758     GemmMicrokernelTester()
9759       .mr(4)
9760       .nr(16)
9761       .kr(1)
9762       .sr(1)
9763       .m(4)
9764       .n(16)
9765       .k(8)
9766       .qmin(128)
9767       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9768   }
9769 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,qmax)9770   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmax) {
9771     TEST_REQUIRES_ARM_NEON;
9772     GemmMicrokernelTester()
9773       .mr(4)
9774       .nr(16)
9775       .kr(1)
9776       .sr(1)
9777       .m(4)
9778       .n(16)
9779       .k(8)
9780       .qmax(128)
9781       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9782   }
9783 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,strided_cm)9784   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm) {
9785     TEST_REQUIRES_ARM_NEON;
9786     GemmMicrokernelTester()
9787       .mr(4)
9788       .nr(16)
9789       .kr(1)
9790       .sr(1)
9791       .m(4)
9792       .n(16)
9793       .k(8)
9794       .cm_stride(19)
9795       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9796   }
9797 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,no_a_zero_point)9798   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, no_a_zero_point) {
9799     TEST_REQUIRES_ARM_NEON;
9800     for (size_t k = 1; k <= 40; k += 9) {
9801       GemmMicrokernelTester()
9802         .mr(4)
9803         .nr(16)
9804         .kr(1)
9805         .sr(1)
9806         .m(4)
9807         .n(16)
9808         .k(k)
9809         .a_zero_point(0)
9810         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9811     }
9812   }
9813 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,no_b_zero_point)9814   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, no_b_zero_point) {
9815     TEST_REQUIRES_ARM_NEON;
9816     for (size_t k = 1; k <= 40; k += 9) {
9817       GemmMicrokernelTester()
9818         .mr(4)
9819         .nr(16)
9820         .kr(1)
9821         .sr(1)
9822         .m(4)
9823         .n(16)
9824         .k(k)
9825         .b_zero_point(0)
9826         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9827     }
9828   }
9829 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,no_zero_point)9830   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, no_zero_point) {
9831     TEST_REQUIRES_ARM_NEON;
9832     for (size_t k = 1; k <= 40; k += 9) {
9833       GemmMicrokernelTester()
9834         .mr(4)
9835         .nr(16)
9836         .kr(1)
9837         .sr(1)
9838         .m(4)
9839         .n(16)
9840         .k(k)
9841         .a_zero_point(0)
9842         .b_zero_point(0)
9843         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9844     }
9845   }
9846 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
9847 
9848 
9849 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8)9850   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8) {
9851     TEST_REQUIRES_ARM_NEON;
9852     GemmMicrokernelTester()
9853       .mr(4)
9854       .nr(16)
9855       .kr(1)
9856       .sr(1)
9857       .m(4)
9858       .n(16)
9859       .k(8)
9860       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9861   }
9862 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,strided_cn)9863   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cn) {
9864     TEST_REQUIRES_ARM_NEON;
9865     GemmMicrokernelTester()
9866       .mr(4)
9867       .nr(16)
9868       .kr(1)
9869       .sr(1)
9870       .m(4)
9871       .n(16)
9872       .k(8)
9873       .cn_stride(19)
9874       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9875   }
9876 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8_subtile)9877   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile) {
9878     TEST_REQUIRES_ARM_NEON;
9879     for (uint32_t n = 1; n <= 16; n++) {
9880       for (uint32_t m = 1; m <= 4; m++) {
9881         GemmMicrokernelTester()
9882           .mr(4)
9883           .nr(16)
9884           .kr(1)
9885           .sr(1)
9886           .m(m)
9887           .n(n)
9888           .k(8)
9889           .iterations(1)
9890           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9891       }
9892     }
9893   }
9894 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8_subtile_m)9895   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) {
9896     TEST_REQUIRES_ARM_NEON;
9897     for (uint32_t m = 1; m <= 4; m++) {
9898       GemmMicrokernelTester()
9899         .mr(4)
9900         .nr(16)
9901         .kr(1)
9902         .sr(1)
9903         .m(m)
9904         .n(16)
9905         .k(8)
9906         .iterations(1)
9907         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9908     }
9909   }
9910 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8_subtile_n)9911   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) {
9912     TEST_REQUIRES_ARM_NEON;
9913     for (uint32_t n = 1; n <= 16; n++) {
9914       GemmMicrokernelTester()
9915         .mr(4)
9916         .nr(16)
9917         .kr(1)
9918         .sr(1)
9919         .m(4)
9920         .n(n)
9921         .k(8)
9922         .iterations(1)
9923         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9924     }
9925   }
9926 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_lt_8)9927   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8) {
9928     TEST_REQUIRES_ARM_NEON;
9929     for (size_t k = 1; k < 8; k++) {
9930       GemmMicrokernelTester()
9931         .mr(4)
9932         .nr(16)
9933         .kr(1)
9934         .sr(1)
9935         .m(4)
9936         .n(16)
9937         .k(k)
9938         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9939     }
9940   }
9941 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_lt_8_subtile)9942   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8_subtile) {
9943     TEST_REQUIRES_ARM_NEON;
9944     for (size_t k = 1; k < 8; k++) {
9945       for (uint32_t n = 1; n <= 16; n++) {
9946         for (uint32_t m = 1; m <= 4; m++) {
9947           GemmMicrokernelTester()
9948             .mr(4)
9949             .nr(16)
9950             .kr(1)
9951             .sr(1)
9952             .m(m)
9953             .n(n)
9954             .k(k)
9955             .iterations(1)
9956             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9957         }
9958       }
9959     }
9960   }
9961 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_gt_8)9962   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8) {
9963     TEST_REQUIRES_ARM_NEON;
9964     for (size_t k = 9; k < 16; k++) {
9965       GemmMicrokernelTester()
9966         .mr(4)
9967         .nr(16)
9968         .kr(1)
9969         .sr(1)
9970         .m(4)
9971         .n(16)
9972         .k(k)
9973         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9974     }
9975   }
9976 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_gt_8_subtile)9977   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8_subtile) {
9978     TEST_REQUIRES_ARM_NEON;
9979     for (size_t k = 9; k < 16; k++) {
9980       for (uint32_t n = 1; n <= 16; n++) {
9981         for (uint32_t m = 1; m <= 4; m++) {
9982           GemmMicrokernelTester()
9983             .mr(4)
9984             .nr(16)
9985             .kr(1)
9986             .sr(1)
9987             .m(m)
9988             .n(n)
9989             .k(k)
9990             .iterations(1)
9991             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
9992         }
9993       }
9994     }
9995   }
9996 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_div_8)9997   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8) {
9998     TEST_REQUIRES_ARM_NEON;
9999     for (size_t k = 16; k <= 80; k += 8) {
10000       GemmMicrokernelTester()
10001         .mr(4)
10002         .nr(16)
10003         .kr(1)
10004         .sr(1)
10005         .m(4)
10006         .n(16)
10007         .k(k)
10008         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10009     }
10010   }
10011 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_div_8_subtile)10012   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8_subtile) {
10013     TEST_REQUIRES_ARM_NEON;
10014     for (size_t k = 16; k <= 80; k += 8) {
10015       for (uint32_t n = 1; n <= 16; n++) {
10016         for (uint32_t m = 1; m <= 4; m++) {
10017           GemmMicrokernelTester()
10018             .mr(4)
10019             .nr(16)
10020             .kr(1)
10021             .sr(1)
10022             .m(m)
10023             .n(n)
10024             .k(k)
10025             .iterations(1)
10026             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10027         }
10028       }
10029     }
10030   }
10031 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_gt_16)10032   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16) {
10033     TEST_REQUIRES_ARM_NEON;
10034     for (uint32_t n = 17; n < 32; n++) {
10035       for (size_t k = 1; k <= 40; k += 9) {
10036         GemmMicrokernelTester()
10037           .mr(4)
10038           .nr(16)
10039           .kr(1)
10040           .sr(1)
10041           .m(4)
10042           .n(n)
10043           .k(k)
10044           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10045       }
10046     }
10047   }
10048 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_gt_16_strided_cn)10049   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_strided_cn) {
10050     TEST_REQUIRES_ARM_NEON;
10051     for (uint32_t n = 17; n < 32; n++) {
10052       for (size_t k = 1; k <= 40; k += 9) {
10053         GemmMicrokernelTester()
10054           .mr(4)
10055           .nr(16)
10056           .kr(1)
10057           .sr(1)
10058           .m(4)
10059           .n(n)
10060           .k(k)
10061           .cn_stride(19)
10062           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10063       }
10064     }
10065   }
10066 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_gt_16_subtile)10067   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_subtile) {
10068     TEST_REQUIRES_ARM_NEON;
10069     for (uint32_t n = 17; n < 32; n++) {
10070       for (size_t k = 1; k <= 40; k += 9) {
10071         for (uint32_t m = 1; m <= 4; m++) {
10072           GemmMicrokernelTester()
10073             .mr(4)
10074             .nr(16)
10075             .kr(1)
10076             .sr(1)
10077             .m(m)
10078             .n(n)
10079             .k(k)
10080             .iterations(1)
10081             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10082         }
10083       }
10084     }
10085   }
10086 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_div_16)10087   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16) {
10088     TEST_REQUIRES_ARM_NEON;
10089     for (uint32_t n = 32; n <= 48; n += 16) {
10090       for (size_t k = 1; k <= 40; k += 9) {
10091         GemmMicrokernelTester()
10092           .mr(4)
10093           .nr(16)
10094           .kr(1)
10095           .sr(1)
10096           .m(4)
10097           .n(n)
10098           .k(k)
10099           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10100       }
10101     }
10102   }
10103 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_div_16_strided_cn)10104   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_strided_cn) {
10105     TEST_REQUIRES_ARM_NEON;
10106     for (uint32_t n = 32; n <= 48; n += 16) {
10107       for (size_t k = 1; k <= 40; k += 9) {
10108         GemmMicrokernelTester()
10109           .mr(4)
10110           .nr(16)
10111           .kr(1)
10112           .sr(1)
10113           .m(4)
10114           .n(n)
10115           .k(k)
10116           .cn_stride(19)
10117           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10118       }
10119     }
10120   }
10121 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_div_16_subtile)10122   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_subtile) {
10123     TEST_REQUIRES_ARM_NEON;
10124     for (uint32_t n = 32; n <= 48; n += 16) {
10125       for (size_t k = 1; k <= 40; k += 9) {
10126         for (uint32_t m = 1; m <= 4; m++) {
10127           GemmMicrokernelTester()
10128             .mr(4)
10129             .nr(16)
10130             .kr(1)
10131             .sr(1)
10132             .m(m)
10133             .n(n)
10134             .k(k)
10135             .iterations(1)
10136             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10137         }
10138       }
10139     }
10140   }
10141 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,small_kernel)10142   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, small_kernel) {
10143     TEST_REQUIRES_ARM_NEON;
10144     for (size_t k = 1; k <= 40; k += 9) {
10145       GemmMicrokernelTester()
10146         .mr(4)
10147         .nr(16)
10148         .kr(1)
10149         .sr(1)
10150         .m(4)
10151         .n(16)
10152         .k(k)
10153         .ks(3)
10154         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10155     }
10156   }
10157 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,small_kernel_subtile)10158   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, small_kernel_subtile) {
10159     TEST_REQUIRES_ARM_NEON;
10160     for (size_t k = 1; k <= 40; k += 9) {
10161       for (uint32_t n = 1; n <= 16; n++) {
10162         for (uint32_t m = 1; m <= 4; m++) {
10163           GemmMicrokernelTester()
10164             .mr(4)
10165             .nr(16)
10166             .kr(1)
10167             .sr(1)
10168             .m(m)
10169             .n(n)
10170             .k(k)
10171             .ks(3)
10172             .iterations(1)
10173             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10174         }
10175       }
10176     }
10177   }
10178 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_gt_16_small_kernel)10179   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_small_kernel) {
10180     TEST_REQUIRES_ARM_NEON;
10181     for (uint32_t n = 17; n < 32; n++) {
10182       for (size_t k = 1; k <= 40; k += 9) {
10183         GemmMicrokernelTester()
10184           .mr(4)
10185           .nr(16)
10186           .kr(1)
10187           .sr(1)
10188           .m(4)
10189           .n(n)
10190           .k(k)
10191           .ks(3)
10192           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10193       }
10194     }
10195   }
10196 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_div_16_small_kernel)10197   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_small_kernel) {
10198     TEST_REQUIRES_ARM_NEON;
10199     for (uint32_t n = 32; n <= 48; n += 16) {
10200       for (size_t k = 1; k <= 40; k += 9) {
10201         GemmMicrokernelTester()
10202           .mr(4)
10203           .nr(16)
10204           .kr(1)
10205           .sr(1)
10206           .m(4)
10207           .n(n)
10208           .k(k)
10209           .ks(3)
10210           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10211       }
10212     }
10213   }
10214 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,strided_cm_subtile)10215   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cm_subtile) {
10216     TEST_REQUIRES_ARM_NEON;
10217     for (size_t k = 1; k <= 40; k += 9) {
10218       for (uint32_t n = 1; n <= 16; n++) {
10219         for (uint32_t m = 1; m <= 4; m++) {
10220           GemmMicrokernelTester()
10221             .mr(4)
10222             .nr(16)
10223             .kr(1)
10224             .sr(1)
10225             .m(m)
10226             .n(n)
10227             .k(k)
10228             .cm_stride(19)
10229             .iterations(1)
10230             .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10231         }
10232       }
10233     }
10234   }
10235 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,a_offset)10236   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, a_offset) {
10237     TEST_REQUIRES_ARM_NEON;
10238     for (size_t k = 1; k <= 40; k += 9) {
10239       GemmMicrokernelTester()
10240         .mr(4)
10241         .nr(16)
10242         .kr(1)
10243         .sr(1)
10244         .m(4)
10245         .n(16)
10246         .k(k)
10247         .ks(3)
10248         .a_offset(163)
10249         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10250     }
10251   }
10252 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,zero)10253   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, zero) {
10254     TEST_REQUIRES_ARM_NEON;
10255     for (size_t k = 1; k <= 40; k += 9) {
10256       for (uint32_t mz = 0; mz < 4; mz++) {
10257         GemmMicrokernelTester()
10258           .mr(4)
10259           .nr(16)
10260           .kr(1)
10261           .sr(1)
10262           .m(4)
10263           .n(16)
10264           .k(k)
10265           .ks(3)
10266           .a_offset(163)
10267           .zero_index(mz)
10268           .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10269       }
10270     }
10271   }
10272 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,qmin)10273   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, qmin) {
10274     TEST_REQUIRES_ARM_NEON;
10275     GemmMicrokernelTester()
10276       .mr(4)
10277       .nr(16)
10278       .kr(1)
10279       .sr(1)
10280       .m(4)
10281       .n(16)
10282       .k(8)
10283       .qmin(128)
10284       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10285   }
10286 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,qmax)10287   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, qmax) {
10288     TEST_REQUIRES_ARM_NEON;
10289     GemmMicrokernelTester()
10290       .mr(4)
10291       .nr(16)
10292       .kr(1)
10293       .sr(1)
10294       .m(4)
10295       .n(16)
10296       .k(8)
10297       .qmax(128)
10298       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10299   }
10300 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,strided_cm)10301   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cm) {
10302     TEST_REQUIRES_ARM_NEON;
10303     GemmMicrokernelTester()
10304       .mr(4)
10305       .nr(16)
10306       .kr(1)
10307       .sr(1)
10308       .m(4)
10309       .n(16)
10310       .k(8)
10311       .cm_stride(19)
10312       .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10313   }
10314 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,no_a_zero_point)10315   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, no_a_zero_point) {
10316     TEST_REQUIRES_ARM_NEON;
10317     for (size_t k = 1; k <= 40; k += 9) {
10318       GemmMicrokernelTester()
10319         .mr(4)
10320         .nr(16)
10321         .kr(1)
10322         .sr(1)
10323         .m(4)
10324         .n(16)
10325         .k(k)
10326         .a_zero_point(0)
10327         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10328     }
10329   }
10330 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,no_b_zero_point)10331   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, no_b_zero_point) {
10332     TEST_REQUIRES_ARM_NEON;
10333     for (size_t k = 1; k <= 40; k += 9) {
10334       GemmMicrokernelTester()
10335         .mr(4)
10336         .nr(16)
10337         .kr(1)
10338         .sr(1)
10339         .m(4)
10340         .n(16)
10341         .k(k)
10342         .b_zero_point(0)
10343         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10344     }
10345   }
10346 
TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,no_zero_point)10347   TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, no_zero_point) {
10348     TEST_REQUIRES_ARM_NEON;
10349     for (size_t k = 1; k <= 40; k += 9) {
10350       GemmMicrokernelTester()
10351         .mr(4)
10352         .nr(16)
10353         .kr(1)
10354         .sr(1)
10355         .m(4)
10356         .n(16)
10357         .k(k)
10358         .a_zero_point(0)
10359         .b_zero_point(0)
10360         .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
10361     }
10362   }
10363 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
10364