• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 //   Specification: test/qs8-igemm-minmax-rndnu.yaml
11 //   Generator: tools/generate-gemm-test.py
12 
13 
14 #include <gtest/gtest.h>
15 
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 
20 #include <xnnpack/gemm.h>
21 #include <xnnpack/igemm.h>
22 #include <xnnpack/ppmm.h>
23 #include "gemm-microkernel-tester.h"
24 
25 
26 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_eq_8)27   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_eq_8) {
28     TEST_REQUIRES_ARM_NEON_DOT;
29     GemmMicrokernelTester()
30       .mr(4)
31       .nr(8)
32       .kr(4)
33       .sr(1)
34       .m(4)
35       .n(8)
36       .k(8)
37       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
38   }
39 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,strided_cn)40   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, strided_cn) {
41     TEST_REQUIRES_ARM_NEON_DOT;
42     GemmMicrokernelTester()
43       .mr(4)
44       .nr(8)
45       .kr(4)
46       .sr(1)
47       .m(4)
48       .n(8)
49       .k(8)
50       .cn_stride(11)
51       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
52   }
53 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_eq_8_subtile)54   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_eq_8_subtile) {
55     TEST_REQUIRES_ARM_NEON_DOT;
56     for (uint32_t n = 1; n <= 8; n++) {
57       for (uint32_t m = 1; m <= 4; m++) {
58         GemmMicrokernelTester()
59           .mr(4)
60           .nr(8)
61           .kr(4)
62           .sr(1)
63           .m(m)
64           .n(n)
65           .k(8)
66           .iterations(1)
67           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
68       }
69     }
70   }
71 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_eq_8_subtile_m)72   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_eq_8_subtile_m) {
73     TEST_REQUIRES_ARM_NEON_DOT;
74     for (uint32_t m = 1; m <= 4; m++) {
75       GemmMicrokernelTester()
76         .mr(4)
77         .nr(8)
78         .kr(4)
79         .sr(1)
80         .m(m)
81         .n(8)
82         .k(8)
83         .iterations(1)
84         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
85     }
86   }
87 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_eq_8_subtile_n)88   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_eq_8_subtile_n) {
89     TEST_REQUIRES_ARM_NEON_DOT;
90     for (uint32_t n = 1; n <= 8; n++) {
91       GemmMicrokernelTester()
92         .mr(4)
93         .nr(8)
94         .kr(4)
95         .sr(1)
96         .m(4)
97         .n(n)
98         .k(8)
99         .iterations(1)
100         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
101     }
102   }
103 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_lt_8)104   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_lt_8) {
105     TEST_REQUIRES_ARM_NEON_DOT;
106     for (size_t k = 1; k < 8; k++) {
107       GemmMicrokernelTester()
108         .mr(4)
109         .nr(8)
110         .kr(4)
111         .sr(1)
112         .m(4)
113         .n(8)
114         .k(k)
115         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
116     }
117   }
118 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_lt_8_subtile)119   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_lt_8_subtile) {
120     TEST_REQUIRES_ARM_NEON_DOT;
121     for (size_t k = 1; k < 8; k++) {
122       for (uint32_t n = 1; n <= 8; n++) {
123         for (uint32_t m = 1; m <= 4; m++) {
124           GemmMicrokernelTester()
125             .mr(4)
126             .nr(8)
127             .kr(4)
128             .sr(1)
129             .m(m)
130             .n(n)
131             .k(k)
132             .iterations(1)
133             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
134         }
135       }
136     }
137   }
138 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_gt_8)139   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_gt_8) {
140     TEST_REQUIRES_ARM_NEON_DOT;
141     for (size_t k = 9; k < 16; k++) {
142       GemmMicrokernelTester()
143         .mr(4)
144         .nr(8)
145         .kr(4)
146         .sr(1)
147         .m(4)
148         .n(8)
149         .k(k)
150         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
151     }
152   }
153 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_gt_8_subtile)154   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_gt_8_subtile) {
155     TEST_REQUIRES_ARM_NEON_DOT;
156     for (size_t k = 9; k < 16; k++) {
157       for (uint32_t n = 1; n <= 8; n++) {
158         for (uint32_t m = 1; m <= 4; m++) {
159           GemmMicrokernelTester()
160             .mr(4)
161             .nr(8)
162             .kr(4)
163             .sr(1)
164             .m(m)
165             .n(n)
166             .k(k)
167             .iterations(1)
168             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
169         }
170       }
171     }
172   }
173 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_div_8)174   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_div_8) {
175     TEST_REQUIRES_ARM_NEON_DOT;
176     for (size_t k = 16; k <= 80; k += 8) {
177       GemmMicrokernelTester()
178         .mr(4)
179         .nr(8)
180         .kr(4)
181         .sr(1)
182         .m(4)
183         .n(8)
184         .k(k)
185         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
186     }
187   }
188 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_div_8_subtile)189   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_div_8_subtile) {
190     TEST_REQUIRES_ARM_NEON_DOT;
191     for (size_t k = 16; k <= 80; k += 8) {
192       for (uint32_t n = 1; n <= 8; n++) {
193         for (uint32_t m = 1; m <= 4; m++) {
194           GemmMicrokernelTester()
195             .mr(4)
196             .nr(8)
197             .kr(4)
198             .sr(1)
199             .m(m)
200             .n(n)
201             .k(k)
202             .iterations(1)
203             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
204         }
205       }
206     }
207   }
208 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_gt_8)209   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_gt_8) {
210     TEST_REQUIRES_ARM_NEON_DOT;
211     for (uint32_t n = 9; n < 16; n++) {
212       for (size_t k = 1; k <= 40; k += 9) {
213         GemmMicrokernelTester()
214           .mr(4)
215           .nr(8)
216           .kr(4)
217           .sr(1)
218           .m(4)
219           .n(n)
220           .k(k)
221           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
222       }
223     }
224   }
225 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_gt_8_strided_cn)226   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_gt_8_strided_cn) {
227     TEST_REQUIRES_ARM_NEON_DOT;
228     for (uint32_t n = 9; n < 16; n++) {
229       for (size_t k = 1; k <= 40; k += 9) {
230         GemmMicrokernelTester()
231           .mr(4)
232           .nr(8)
233           .kr(4)
234           .sr(1)
235           .m(4)
236           .n(n)
237           .k(k)
238           .cn_stride(11)
239           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
240       }
241     }
242   }
243 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_gt_8_subtile)244   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_gt_8_subtile) {
245     TEST_REQUIRES_ARM_NEON_DOT;
246     for (uint32_t n = 9; n < 16; n++) {
247       for (size_t k = 1; k <= 40; k += 9) {
248         for (uint32_t m = 1; m <= 4; m++) {
249           GemmMicrokernelTester()
250             .mr(4)
251             .nr(8)
252             .kr(4)
253             .sr(1)
254             .m(m)
255             .n(n)
256             .k(k)
257             .iterations(1)
258             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
259         }
260       }
261     }
262   }
263 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_div_8)264   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_div_8) {
265     TEST_REQUIRES_ARM_NEON_DOT;
266     for (uint32_t n = 16; n <= 24; n += 8) {
267       for (size_t k = 1; k <= 40; k += 9) {
268         GemmMicrokernelTester()
269           .mr(4)
270           .nr(8)
271           .kr(4)
272           .sr(1)
273           .m(4)
274           .n(n)
275           .k(k)
276           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
277       }
278     }
279   }
280 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_div_8_strided_cn)281   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_div_8_strided_cn) {
282     TEST_REQUIRES_ARM_NEON_DOT;
283     for (uint32_t n = 16; n <= 24; n += 8) {
284       for (size_t k = 1; k <= 40; k += 9) {
285         GemmMicrokernelTester()
286           .mr(4)
287           .nr(8)
288           .kr(4)
289           .sr(1)
290           .m(4)
291           .n(n)
292           .k(k)
293           .cn_stride(11)
294           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
295       }
296     }
297   }
298 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_div_8_subtile)299   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_div_8_subtile) {
300     TEST_REQUIRES_ARM_NEON_DOT;
301     for (uint32_t n = 16; n <= 24; n += 8) {
302       for (size_t k = 1; k <= 40; k += 9) {
303         for (uint32_t m = 1; m <= 4; m++) {
304           GemmMicrokernelTester()
305             .mr(4)
306             .nr(8)
307             .kr(4)
308             .sr(1)
309             .m(m)
310             .n(n)
311             .k(k)
312             .iterations(1)
313             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
314         }
315       }
316     }
317   }
318 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,small_kernel)319   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, small_kernel) {
320     TEST_REQUIRES_ARM_NEON_DOT;
321     for (size_t k = 1; k <= 40; k += 9) {
322       GemmMicrokernelTester()
323         .mr(4)
324         .nr(8)
325         .kr(4)
326         .sr(1)
327         .m(4)
328         .n(8)
329         .k(k)
330         .ks(3)
331         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
332     }
333   }
334 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,small_kernel_subtile)335   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, small_kernel_subtile) {
336     TEST_REQUIRES_ARM_NEON_DOT;
337     for (size_t k = 1; k <= 40; k += 9) {
338       for (uint32_t n = 1; n <= 8; n++) {
339         for (uint32_t m = 1; m <= 4; m++) {
340           GemmMicrokernelTester()
341             .mr(4)
342             .nr(8)
343             .kr(4)
344             .sr(1)
345             .m(m)
346             .n(n)
347             .k(k)
348             .ks(3)
349             .iterations(1)
350             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
351         }
352       }
353     }
354   }
355 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_gt_8_small_kernel)356   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_gt_8_small_kernel) {
357     TEST_REQUIRES_ARM_NEON_DOT;
358     for (uint32_t n = 9; n < 16; n++) {
359       for (size_t k = 1; k <= 40; k += 9) {
360         GemmMicrokernelTester()
361           .mr(4)
362           .nr(8)
363           .kr(4)
364           .sr(1)
365           .m(4)
366           .n(n)
367           .k(k)
368           .ks(3)
369           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
370       }
371     }
372   }
373 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_div_8_small_kernel)374   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_div_8_small_kernel) {
375     TEST_REQUIRES_ARM_NEON_DOT;
376     for (uint32_t n = 16; n <= 24; n += 8) {
377       for (size_t k = 1; k <= 40; k += 9) {
378         GemmMicrokernelTester()
379           .mr(4)
380           .nr(8)
381           .kr(4)
382           .sr(1)
383           .m(4)
384           .n(n)
385           .k(k)
386           .ks(3)
387           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
388       }
389     }
390   }
391 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,strided_cm_subtile)392   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, strided_cm_subtile) {
393     TEST_REQUIRES_ARM_NEON_DOT;
394     for (size_t k = 1; k <= 40; k += 9) {
395       for (uint32_t n = 1; n <= 8; n++) {
396         for (uint32_t m = 1; m <= 4; m++) {
397           GemmMicrokernelTester()
398             .mr(4)
399             .nr(8)
400             .kr(4)
401             .sr(1)
402             .m(m)
403             .n(n)
404             .k(k)
405             .cm_stride(11)
406             .iterations(1)
407             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
408         }
409       }
410     }
411   }
412 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,a_offset)413   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, a_offset) {
414     TEST_REQUIRES_ARM_NEON_DOT;
415     for (size_t k = 1; k <= 40; k += 9) {
416       GemmMicrokernelTester()
417         .mr(4)
418         .nr(8)
419         .kr(4)
420         .sr(1)
421         .m(4)
422         .n(8)
423         .k(k)
424         .ks(3)
425         .a_offset(163)
426         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
427     }
428   }
429 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,zero)430   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, zero) {
431     TEST_REQUIRES_ARM_NEON_DOT;
432     for (size_t k = 1; k <= 40; k += 9) {
433       for (uint32_t mz = 0; mz < 4; mz++) {
434         GemmMicrokernelTester()
435           .mr(4)
436           .nr(8)
437           .kr(4)
438           .sr(1)
439           .m(4)
440           .n(8)
441           .k(k)
442           .ks(3)
443           .a_offset(163)
444           .zero_index(mz)
445           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
446       }
447     }
448   }
449 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,qmin)450   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, qmin) {
451     TEST_REQUIRES_ARM_NEON_DOT;
452     GemmMicrokernelTester()
453       .mr(4)
454       .nr(8)
455       .kr(4)
456       .sr(1)
457       .m(4)
458       .n(8)
459       .k(8)
460       .qmin(128)
461       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
462   }
463 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,qmax)464   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, qmax) {
465     TEST_REQUIRES_ARM_NEON_DOT;
466     GemmMicrokernelTester()
467       .mr(4)
468       .nr(8)
469       .kr(4)
470       .sr(1)
471       .m(4)
472       .n(8)
473       .k(8)
474       .qmax(128)
475       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
476   }
477 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55,strided_cm)478   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, strided_cm) {
479     TEST_REQUIRES_ARM_NEON_DOT;
480     GemmMicrokernelTester()
481       .mr(4)
482       .nr(8)
483       .kr(4)
484       .sr(1)
485       .m(4)
486       .n(8)
487       .k(8)
488       .cm_stride(11)
489       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
490   }
491 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY
492 
493 
494 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8)495   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) {
496     TEST_REQUIRES_ARM_NEON;
497     GemmMicrokernelTester()
498       .mr(4)
499       .nr(8)
500       .kr(1)
501       .sr(1)
502       .m(4)
503       .n(8)
504       .k(8)
505       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
506   }
507 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,strided_cn)508   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cn) {
509     TEST_REQUIRES_ARM_NEON;
510     GemmMicrokernelTester()
511       .mr(4)
512       .nr(8)
513       .kr(1)
514       .sr(1)
515       .m(4)
516       .n(8)
517       .k(8)
518       .cn_stride(11)
519       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
520   }
521 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile)522   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
523     TEST_REQUIRES_ARM_NEON;
524     for (uint32_t n = 1; n <= 8; n++) {
525       for (uint32_t m = 1; m <= 4; m++) {
526         GemmMicrokernelTester()
527           .mr(4)
528           .nr(8)
529           .kr(1)
530           .sr(1)
531           .m(m)
532           .n(n)
533           .k(8)
534           .iterations(1)
535           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
536       }
537     }
538   }
539 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_m)540   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
541     TEST_REQUIRES_ARM_NEON;
542     for (uint32_t m = 1; m <= 4; m++) {
543       GemmMicrokernelTester()
544         .mr(4)
545         .nr(8)
546         .kr(1)
547         .sr(1)
548         .m(m)
549         .n(8)
550         .k(8)
551         .iterations(1)
552         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
553     }
554   }
555 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_n)556   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
557     TEST_REQUIRES_ARM_NEON;
558     for (uint32_t n = 1; n <= 8; n++) {
559       GemmMicrokernelTester()
560         .mr(4)
561         .nr(8)
562         .kr(1)
563         .sr(1)
564         .m(4)
565         .n(n)
566         .k(8)
567         .iterations(1)
568         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
569     }
570   }
571 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_lt_8)572   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) {
573     TEST_REQUIRES_ARM_NEON;
574     for (size_t k = 1; k < 8; k++) {
575       GemmMicrokernelTester()
576         .mr(4)
577         .nr(8)
578         .kr(1)
579         .sr(1)
580         .m(4)
581         .n(8)
582         .k(k)
583         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
584     }
585   }
586 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_lt_8_subtile)587   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
588     TEST_REQUIRES_ARM_NEON;
589     for (size_t k = 1; k < 8; k++) {
590       for (uint32_t n = 1; n <= 8; n++) {
591         for (uint32_t m = 1; m <= 4; m++) {
592           GemmMicrokernelTester()
593             .mr(4)
594             .nr(8)
595             .kr(1)
596             .sr(1)
597             .m(m)
598             .n(n)
599             .k(k)
600             .iterations(1)
601             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
602         }
603       }
604     }
605   }
606 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_gt_8)607   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) {
608     TEST_REQUIRES_ARM_NEON;
609     for (size_t k = 9; k < 16; k++) {
610       GemmMicrokernelTester()
611         .mr(4)
612         .nr(8)
613         .kr(1)
614         .sr(1)
615         .m(4)
616         .n(8)
617         .k(k)
618         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
619     }
620   }
621 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_gt_8_subtile)622   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
623     TEST_REQUIRES_ARM_NEON;
624     for (size_t k = 9; k < 16; k++) {
625       for (uint32_t n = 1; n <= 8; n++) {
626         for (uint32_t m = 1; m <= 4; m++) {
627           GemmMicrokernelTester()
628             .mr(4)
629             .nr(8)
630             .kr(1)
631             .sr(1)
632             .m(m)
633             .n(n)
634             .k(k)
635             .iterations(1)
636             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
637         }
638       }
639     }
640   }
641 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_div_8)642   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8) {
643     TEST_REQUIRES_ARM_NEON;
644     for (size_t k = 16; k <= 80; k += 8) {
645       GemmMicrokernelTester()
646         .mr(4)
647         .nr(8)
648         .kr(1)
649         .sr(1)
650         .m(4)
651         .n(8)
652         .k(k)
653         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
654     }
655   }
656 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_div_8_subtile)657   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
658     TEST_REQUIRES_ARM_NEON;
659     for (size_t k = 16; k <= 80; k += 8) {
660       for (uint32_t n = 1; n <= 8; n++) {
661         for (uint32_t m = 1; m <= 4; m++) {
662           GemmMicrokernelTester()
663             .mr(4)
664             .nr(8)
665             .kr(1)
666             .sr(1)
667             .m(m)
668             .n(n)
669             .k(k)
670             .iterations(1)
671             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
672         }
673       }
674     }
675   }
676 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_8)677   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_8) {
678     TEST_REQUIRES_ARM_NEON;
679     for (uint32_t n = 9; n < 16; n++) {
680       for (size_t k = 1; k <= 40; k += 9) {
681         GemmMicrokernelTester()
682           .mr(4)
683           .nr(8)
684           .kr(1)
685           .sr(1)
686           .m(4)
687           .n(n)
688           .k(k)
689           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
690       }
691     }
692   }
693 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_8_strided_cn)694   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_strided_cn) {
695     TEST_REQUIRES_ARM_NEON;
696     for (uint32_t n = 9; n < 16; n++) {
697       for (size_t k = 1; k <= 40; k += 9) {
698         GemmMicrokernelTester()
699           .mr(4)
700           .nr(8)
701           .kr(1)
702           .sr(1)
703           .m(4)
704           .n(n)
705           .k(k)
706           .cn_stride(11)
707           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
708       }
709     }
710   }
711 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_8_subtile)712   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_subtile) {
713     TEST_REQUIRES_ARM_NEON;
714     for (uint32_t n = 9; n < 16; n++) {
715       for (size_t k = 1; k <= 40; k += 9) {
716         for (uint32_t m = 1; m <= 4; m++) {
717           GemmMicrokernelTester()
718             .mr(4)
719             .nr(8)
720             .kr(1)
721             .sr(1)
722             .m(m)
723             .n(n)
724             .k(k)
725             .iterations(1)
726             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
727         }
728       }
729     }
730   }
731 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_8)732   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_8) {
733     TEST_REQUIRES_ARM_NEON;
734     for (uint32_t n = 16; n <= 24; n += 8) {
735       for (size_t k = 1; k <= 40; k += 9) {
736         GemmMicrokernelTester()
737           .mr(4)
738           .nr(8)
739           .kr(1)
740           .sr(1)
741           .m(4)
742           .n(n)
743           .k(k)
744           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
745       }
746     }
747   }
748 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_8_strided_cn)749   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_8_strided_cn) {
750     TEST_REQUIRES_ARM_NEON;
751     for (uint32_t n = 16; n <= 24; n += 8) {
752       for (size_t k = 1; k <= 40; k += 9) {
753         GemmMicrokernelTester()
754           .mr(4)
755           .nr(8)
756           .kr(1)
757           .sr(1)
758           .m(4)
759           .n(n)
760           .k(k)
761           .cn_stride(11)
762           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
763       }
764     }
765   }
766 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_8_subtile)767   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_8_subtile) {
768     TEST_REQUIRES_ARM_NEON;
769     for (uint32_t n = 16; n <= 24; n += 8) {
770       for (size_t k = 1; k <= 40; k += 9) {
771         for (uint32_t m = 1; m <= 4; m++) {
772           GemmMicrokernelTester()
773             .mr(4)
774             .nr(8)
775             .kr(1)
776             .sr(1)
777             .m(m)
778             .n(n)
779             .k(k)
780             .iterations(1)
781             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
782         }
783       }
784     }
785   }
786 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,small_kernel)787   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, small_kernel) {
788     TEST_REQUIRES_ARM_NEON;
789     for (size_t k = 1; k <= 40; k += 9) {
790       GemmMicrokernelTester()
791         .mr(4)
792         .nr(8)
793         .kr(1)
794         .sr(1)
795         .m(4)
796         .n(8)
797         .k(k)
798         .ks(3)
799         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
800     }
801   }
802 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,small_kernel_subtile)803   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, small_kernel_subtile) {
804     TEST_REQUIRES_ARM_NEON;
805     for (size_t k = 1; k <= 40; k += 9) {
806       for (uint32_t n = 1; n <= 8; n++) {
807         for (uint32_t m = 1; m <= 4; m++) {
808           GemmMicrokernelTester()
809             .mr(4)
810             .nr(8)
811             .kr(1)
812             .sr(1)
813             .m(m)
814             .n(n)
815             .k(k)
816             .ks(3)
817             .iterations(1)
818             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
819         }
820       }
821     }
822   }
823 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_8_small_kernel)824   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_small_kernel) {
825     TEST_REQUIRES_ARM_NEON;
826     for (uint32_t n = 9; n < 16; n++) {
827       for (size_t k = 1; k <= 40; k += 9) {
828         GemmMicrokernelTester()
829           .mr(4)
830           .nr(8)
831           .kr(1)
832           .sr(1)
833           .m(4)
834           .n(n)
835           .k(k)
836           .ks(3)
837           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
838       }
839     }
840   }
841 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_8_small_kernel)842   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_8_small_kernel) {
843     TEST_REQUIRES_ARM_NEON;
844     for (uint32_t n = 16; n <= 24; n += 8) {
845       for (size_t k = 1; k <= 40; k += 9) {
846         GemmMicrokernelTester()
847           .mr(4)
848           .nr(8)
849           .kr(1)
850           .sr(1)
851           .m(4)
852           .n(n)
853           .k(k)
854           .ks(3)
855           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
856       }
857     }
858   }
859 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,strided_cm_subtile)860   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
861     TEST_REQUIRES_ARM_NEON;
862     for (size_t k = 1; k <= 40; k += 9) {
863       for (uint32_t n = 1; n <= 8; n++) {
864         for (uint32_t m = 1; m <= 4; m++) {
865           GemmMicrokernelTester()
866             .mr(4)
867             .nr(8)
868             .kr(1)
869             .sr(1)
870             .m(m)
871             .n(n)
872             .k(k)
873             .cm_stride(11)
874             .iterations(1)
875             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
876         }
877       }
878     }
879   }
880 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,a_offset)881   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, a_offset) {
882     TEST_REQUIRES_ARM_NEON;
883     for (size_t k = 1; k <= 40; k += 9) {
884       GemmMicrokernelTester()
885         .mr(4)
886         .nr(8)
887         .kr(1)
888         .sr(1)
889         .m(4)
890         .n(8)
891         .k(k)
892         .ks(3)
893         .a_offset(163)
894         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
895     }
896   }
897 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,zero)898   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, zero) {
899     TEST_REQUIRES_ARM_NEON;
900     for (size_t k = 1; k <= 40; k += 9) {
901       for (uint32_t mz = 0; mz < 4; mz++) {
902         GemmMicrokernelTester()
903           .mr(4)
904           .nr(8)
905           .kr(1)
906           .sr(1)
907           .m(4)
908           .n(8)
909           .k(k)
910           .ks(3)
911           .a_offset(163)
912           .zero_index(mz)
913           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
914       }
915     }
916   }
917 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,qmin)918   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmin) {
919     TEST_REQUIRES_ARM_NEON;
920     GemmMicrokernelTester()
921       .mr(4)
922       .nr(8)
923       .kr(1)
924       .sr(1)
925       .m(4)
926       .n(8)
927       .k(8)
928       .qmin(128)
929       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
930   }
931 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,qmax)932   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmax) {
933     TEST_REQUIRES_ARM_NEON;
934     GemmMicrokernelTester()
935       .mr(4)
936       .nr(8)
937       .kr(1)
938       .sr(1)
939       .m(4)
940       .n(8)
941       .k(8)
942       .qmax(128)
943       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
944   }
945 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64,strided_cm)946   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm) {
947     TEST_REQUIRES_ARM_NEON;
948     GemmMicrokernelTester()
949       .mr(4)
950       .nr(8)
951       .kr(1)
952       .sr(1)
953       .m(4)
954       .n(8)
955       .k(8)
956       .cm_stride(11)
957       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
958   }
959 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
960 
961 
962 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_eq_8)963   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8) {
964     TEST_REQUIRES_ARM_NEON;
965     GemmMicrokernelTester()
966       .mr(2)
967       .nr(8)
968       .kr(2)
969       .sr(1)
970       .m(2)
971       .n(8)
972       .k(8)
973       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
974   }
975 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,strided_cn)976   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, strided_cn) {
977     TEST_REQUIRES_ARM_NEON;
978     GemmMicrokernelTester()
979       .mr(2)
980       .nr(8)
981       .kr(2)
982       .sr(1)
983       .m(2)
984       .n(8)
985       .k(8)
986       .cn_stride(11)
987       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
988   }
989 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_eq_8_subtile)990   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8_subtile) {
991     TEST_REQUIRES_ARM_NEON;
992     for (uint32_t n = 1; n <= 8; n++) {
993       for (uint32_t m = 1; m <= 2; m++) {
994         GemmMicrokernelTester()
995           .mr(2)
996           .nr(8)
997           .kr(2)
998           .sr(1)
999           .m(m)
1000           .n(n)
1001           .k(8)
1002           .iterations(1)
1003           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1004       }
1005     }
1006   }
1007 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_eq_8_subtile_m)1008   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8_subtile_m) {
1009     TEST_REQUIRES_ARM_NEON;
1010     for (uint32_t m = 1; m <= 2; m++) {
1011       GemmMicrokernelTester()
1012         .mr(2)
1013         .nr(8)
1014         .kr(2)
1015         .sr(1)
1016         .m(m)
1017         .n(8)
1018         .k(8)
1019         .iterations(1)
1020         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1021     }
1022   }
1023 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_eq_8_subtile_n)1024   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8_subtile_n) {
1025     TEST_REQUIRES_ARM_NEON;
1026     for (uint32_t n = 1; n <= 8; n++) {
1027       GemmMicrokernelTester()
1028         .mr(2)
1029         .nr(8)
1030         .kr(2)
1031         .sr(1)
1032         .m(2)
1033         .n(n)
1034         .k(8)
1035         .iterations(1)
1036         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1037     }
1038   }
1039 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_lt_8)1040   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_lt_8) {
1041     TEST_REQUIRES_ARM_NEON;
1042     for (size_t k = 1; k < 8; k++) {
1043       GemmMicrokernelTester()
1044         .mr(2)
1045         .nr(8)
1046         .kr(2)
1047         .sr(1)
1048         .m(2)
1049         .n(8)
1050         .k(k)
1051         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1052     }
1053   }
1054 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_lt_8_subtile)1055   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_lt_8_subtile) {
1056     TEST_REQUIRES_ARM_NEON;
1057     for (size_t k = 1; k < 8; k++) {
1058       for (uint32_t n = 1; n <= 8; n++) {
1059         for (uint32_t m = 1; m <= 2; m++) {
1060           GemmMicrokernelTester()
1061             .mr(2)
1062             .nr(8)
1063             .kr(2)
1064             .sr(1)
1065             .m(m)
1066             .n(n)
1067             .k(k)
1068             .iterations(1)
1069             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1070         }
1071       }
1072     }
1073   }
1074 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_gt_8)1075   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_gt_8) {
1076     TEST_REQUIRES_ARM_NEON;
1077     for (size_t k = 9; k < 16; k++) {
1078       GemmMicrokernelTester()
1079         .mr(2)
1080         .nr(8)
1081         .kr(2)
1082         .sr(1)
1083         .m(2)
1084         .n(8)
1085         .k(k)
1086         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1087     }
1088   }
1089 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_gt_8_subtile)1090   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_gt_8_subtile) {
1091     TEST_REQUIRES_ARM_NEON;
1092     for (size_t k = 9; k < 16; k++) {
1093       for (uint32_t n = 1; n <= 8; n++) {
1094         for (uint32_t m = 1; m <= 2; m++) {
1095           GemmMicrokernelTester()
1096             .mr(2)
1097             .nr(8)
1098             .kr(2)
1099             .sr(1)
1100             .m(m)
1101             .n(n)
1102             .k(k)
1103             .iterations(1)
1104             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1105         }
1106       }
1107     }
1108   }
1109 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_div_8)1110   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_div_8) {
1111     TEST_REQUIRES_ARM_NEON;
1112     for (size_t k = 16; k <= 80; k += 8) {
1113       GemmMicrokernelTester()
1114         .mr(2)
1115         .nr(8)
1116         .kr(2)
1117         .sr(1)
1118         .m(2)
1119         .n(8)
1120         .k(k)
1121         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1122     }
1123   }
1124 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_div_8_subtile)1125   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_div_8_subtile) {
1126     TEST_REQUIRES_ARM_NEON;
1127     for (size_t k = 16; k <= 80; k += 8) {
1128       for (uint32_t n = 1; n <= 8; n++) {
1129         for (uint32_t m = 1; m <= 2; m++) {
1130           GemmMicrokernelTester()
1131             .mr(2)
1132             .nr(8)
1133             .kr(2)
1134             .sr(1)
1135             .m(m)
1136             .n(n)
1137             .k(k)
1138             .iterations(1)
1139             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1140         }
1141       }
1142     }
1143   }
1144 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,n_gt_8)1145   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_gt_8) {
1146     TEST_REQUIRES_ARM_NEON;
1147     for (uint32_t n = 9; n < 16; n++) {
1148       for (size_t k = 1; k <= 40; k += 9) {
1149         GemmMicrokernelTester()
1150           .mr(2)
1151           .nr(8)
1152           .kr(2)
1153           .sr(1)
1154           .m(2)
1155           .n(n)
1156           .k(k)
1157           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1158       }
1159     }
1160   }
1161 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,n_gt_8_strided_cn)1162   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_gt_8_strided_cn) {
1163     TEST_REQUIRES_ARM_NEON;
1164     for (uint32_t n = 9; n < 16; n++) {
1165       for (size_t k = 1; k <= 40; k += 9) {
1166         GemmMicrokernelTester()
1167           .mr(2)
1168           .nr(8)
1169           .kr(2)
1170           .sr(1)
1171           .m(2)
1172           .n(n)
1173           .k(k)
1174           .cn_stride(11)
1175           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1176       }
1177     }
1178   }
1179 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,n_gt_8_subtile)1180   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_gt_8_subtile) {
1181     TEST_REQUIRES_ARM_NEON;
1182     for (uint32_t n = 9; n < 16; n++) {
1183       for (size_t k = 1; k <= 40; k += 9) {
1184         for (uint32_t m = 1; m <= 2; m++) {
1185           GemmMicrokernelTester()
1186             .mr(2)
1187             .nr(8)
1188             .kr(2)
1189             .sr(1)
1190             .m(m)
1191             .n(n)
1192             .k(k)
1193             .iterations(1)
1194             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1195         }
1196       }
1197     }
1198   }
1199 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,n_div_8)1200   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_div_8) {
1201     TEST_REQUIRES_ARM_NEON;
1202     for (uint32_t n = 16; n <= 24; n += 8) {
1203       for (size_t k = 1; k <= 40; k += 9) {
1204         GemmMicrokernelTester()
1205           .mr(2)
1206           .nr(8)
1207           .kr(2)
1208           .sr(1)
1209           .m(2)
1210           .n(n)
1211           .k(k)
1212           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1213       }
1214     }
1215   }
1216 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,n_div_8_strided_cn)1217   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_div_8_strided_cn) {
1218     TEST_REQUIRES_ARM_NEON;
1219     for (uint32_t n = 16; n <= 24; n += 8) {
1220       for (size_t k = 1; k <= 40; k += 9) {
1221         GemmMicrokernelTester()
1222           .mr(2)
1223           .nr(8)
1224           .kr(2)
1225           .sr(1)
1226           .m(2)
1227           .n(n)
1228           .k(k)
1229           .cn_stride(11)
1230           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1231       }
1232     }
1233   }
1234 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,n_div_8_subtile)1235   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_div_8_subtile) {
1236     TEST_REQUIRES_ARM_NEON;
1237     for (uint32_t n = 16; n <= 24; n += 8) {
1238       for (size_t k = 1; k <= 40; k += 9) {
1239         for (uint32_t m = 1; m <= 2; m++) {
1240           GemmMicrokernelTester()
1241             .mr(2)
1242             .nr(8)
1243             .kr(2)
1244             .sr(1)
1245             .m(m)
1246             .n(n)
1247             .k(k)
1248             .iterations(1)
1249             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1250         }
1251       }
1252     }
1253   }
1254 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,small_kernel)1255   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, small_kernel) {
1256     TEST_REQUIRES_ARM_NEON;
1257     for (size_t k = 1; k <= 40; k += 9) {
1258       GemmMicrokernelTester()
1259         .mr(2)
1260         .nr(8)
1261         .kr(2)
1262         .sr(1)
1263         .m(2)
1264         .n(8)
1265         .k(k)
1266         .ks(3)
1267         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1268     }
1269   }
1270 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,small_kernel_subtile)1271   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, small_kernel_subtile) {
1272     TEST_REQUIRES_ARM_NEON;
1273     for (size_t k = 1; k <= 40; k += 9) {
1274       for (uint32_t n = 1; n <= 8; n++) {
1275         for (uint32_t m = 1; m <= 2; m++) {
1276           GemmMicrokernelTester()
1277             .mr(2)
1278             .nr(8)
1279             .kr(2)
1280             .sr(1)
1281             .m(m)
1282             .n(n)
1283             .k(k)
1284             .ks(3)
1285             .iterations(1)
1286             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1287         }
1288       }
1289     }
1290   }
1291 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,n_gt_8_small_kernel)1292   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_gt_8_small_kernel) {
1293     TEST_REQUIRES_ARM_NEON;
1294     for (uint32_t n = 9; n < 16; n++) {
1295       for (size_t k = 1; k <= 40; k += 9) {
1296         GemmMicrokernelTester()
1297           .mr(2)
1298           .nr(8)
1299           .kr(2)
1300           .sr(1)
1301           .m(2)
1302           .n(n)
1303           .k(k)
1304           .ks(3)
1305           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1306       }
1307     }
1308   }
1309 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,n_div_8_small_kernel)1310   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_div_8_small_kernel) {
1311     TEST_REQUIRES_ARM_NEON;
1312     for (uint32_t n = 16; n <= 24; n += 8) {
1313       for (size_t k = 1; k <= 40; k += 9) {
1314         GemmMicrokernelTester()
1315           .mr(2)
1316           .nr(8)
1317           .kr(2)
1318           .sr(1)
1319           .m(2)
1320           .n(n)
1321           .k(k)
1322           .ks(3)
1323           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1324       }
1325     }
1326   }
1327 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,strided_cm_subtile)1328   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, strided_cm_subtile) {
1329     TEST_REQUIRES_ARM_NEON;
1330     for (size_t k = 1; k <= 40; k += 9) {
1331       for (uint32_t n = 1; n <= 8; n++) {
1332         for (uint32_t m = 1; m <= 2; m++) {
1333           GemmMicrokernelTester()
1334             .mr(2)
1335             .nr(8)
1336             .kr(2)
1337             .sr(1)
1338             .m(m)
1339             .n(n)
1340             .k(k)
1341             .cm_stride(11)
1342             .iterations(1)
1343             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1344         }
1345       }
1346     }
1347   }
1348 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,a_offset)1349   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, a_offset) {
1350     TEST_REQUIRES_ARM_NEON;
1351     for (size_t k = 1; k <= 40; k += 9) {
1352       GemmMicrokernelTester()
1353         .mr(2)
1354         .nr(8)
1355         .kr(2)
1356         .sr(1)
1357         .m(2)
1358         .n(8)
1359         .k(k)
1360         .ks(3)
1361         .a_offset(83)
1362         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1363     }
1364   }
1365 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,zero)1366   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, zero) {
1367     TEST_REQUIRES_ARM_NEON;
1368     for (size_t k = 1; k <= 40; k += 9) {
1369       for (uint32_t mz = 0; mz < 2; mz++) {
1370         GemmMicrokernelTester()
1371           .mr(2)
1372           .nr(8)
1373           .kr(2)
1374           .sr(1)
1375           .m(2)
1376           .n(8)
1377           .k(k)
1378           .ks(3)
1379           .a_offset(83)
1380           .zero_index(mz)
1381           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1382       }
1383     }
1384   }
1385 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,qmin)1386   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, qmin) {
1387     TEST_REQUIRES_ARM_NEON;
1388     GemmMicrokernelTester()
1389       .mr(2)
1390       .nr(8)
1391       .kr(2)
1392       .sr(1)
1393       .m(2)
1394       .n(8)
1395       .k(8)
1396       .qmin(128)
1397       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1398   }
1399 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,qmax)1400   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, qmax) {
1401     TEST_REQUIRES_ARM_NEON;
1402     GemmMicrokernelTester()
1403       .mr(2)
1404       .nr(8)
1405       .kr(2)
1406       .sr(1)
1407       .m(2)
1408       .n(8)
1409       .k(8)
1410       .qmax(128)
1411       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1412   }
1413 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,strided_cm)1414   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, strided_cm) {
1415     TEST_REQUIRES_ARM_NEON;
1416     GemmMicrokernelTester()
1417       .mr(2)
1418       .nr(8)
1419       .kr(2)
1420       .sr(1)
1421       .m(2)
1422       .n(8)
1423       .k(8)
1424       .cm_stride(11)
1425       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1426   }
1427 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1428 
1429 
1430 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,k_eq_16)1431   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_eq_16) {
1432     TEST_REQUIRES_ARM_NEON;
1433     GemmMicrokernelTester()
1434       .mr(2)
1435       .nr(8)
1436       .kr(2)
1437       .sr(1)
1438       .m(2)
1439       .n(8)
1440       .k(16)
1441       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1442   }
1443 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,strided_cn)1444   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, strided_cn) {
1445     TEST_REQUIRES_ARM_NEON;
1446     GemmMicrokernelTester()
1447       .mr(2)
1448       .nr(8)
1449       .kr(2)
1450       .sr(1)
1451       .m(2)
1452       .n(8)
1453       .k(16)
1454       .cn_stride(11)
1455       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1456   }
1457 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,k_eq_16_subtile)1458   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile) {
1459     TEST_REQUIRES_ARM_NEON;
1460     for (uint32_t n = 1; n <= 8; n++) {
1461       for (uint32_t m = 1; m <= 2; m++) {
1462         GemmMicrokernelTester()
1463           .mr(2)
1464           .nr(8)
1465           .kr(2)
1466           .sr(1)
1467           .m(m)
1468           .n(n)
1469           .k(16)
1470           .iterations(1)
1471           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1472       }
1473     }
1474   }
1475 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_m)1476   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
1477     TEST_REQUIRES_ARM_NEON;
1478     for (uint32_t m = 1; m <= 2; m++) {
1479       GemmMicrokernelTester()
1480         .mr(2)
1481         .nr(8)
1482         .kr(2)
1483         .sr(1)
1484         .m(m)
1485         .n(8)
1486         .k(16)
1487         .iterations(1)
1488         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1489     }
1490   }
1491 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_n)1492   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
1493     TEST_REQUIRES_ARM_NEON;
1494     for (uint32_t n = 1; n <= 8; n++) {
1495       GemmMicrokernelTester()
1496         .mr(2)
1497         .nr(8)
1498         .kr(2)
1499         .sr(1)
1500         .m(2)
1501         .n(n)
1502         .k(16)
1503         .iterations(1)
1504         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1505     }
1506   }
1507 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,k_lt_16)1508   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_lt_16) {
1509     TEST_REQUIRES_ARM_NEON;
1510     for (size_t k = 1; k < 16; k++) {
1511       GemmMicrokernelTester()
1512         .mr(2)
1513         .nr(8)
1514         .kr(2)
1515         .sr(1)
1516         .m(2)
1517         .n(8)
1518         .k(k)
1519         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1520     }
1521   }
1522 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,k_lt_16_subtile)1523   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_lt_16_subtile) {
1524     TEST_REQUIRES_ARM_NEON;
1525     for (size_t k = 1; k < 16; k++) {
1526       for (uint32_t n = 1; n <= 8; n++) {
1527         for (uint32_t m = 1; m <= 2; m++) {
1528           GemmMicrokernelTester()
1529             .mr(2)
1530             .nr(8)
1531             .kr(2)
1532             .sr(1)
1533             .m(m)
1534             .n(n)
1535             .k(k)
1536             .iterations(1)
1537             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1538         }
1539       }
1540     }
1541   }
1542 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,k_gt_16)1543   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_gt_16) {
1544     TEST_REQUIRES_ARM_NEON;
1545     for (size_t k = 17; k < 32; k++) {
1546       GemmMicrokernelTester()
1547         .mr(2)
1548         .nr(8)
1549         .kr(2)
1550         .sr(1)
1551         .m(2)
1552         .n(8)
1553         .k(k)
1554         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1555     }
1556   }
1557 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,k_gt_16_subtile)1558   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_gt_16_subtile) {
1559     TEST_REQUIRES_ARM_NEON;
1560     for (size_t k = 17; k < 32; k++) {
1561       for (uint32_t n = 1; n <= 8; n++) {
1562         for (uint32_t m = 1; m <= 2; m++) {
1563           GemmMicrokernelTester()
1564             .mr(2)
1565             .nr(8)
1566             .kr(2)
1567             .sr(1)
1568             .m(m)
1569             .n(n)
1570             .k(k)
1571             .iterations(1)
1572             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1573         }
1574       }
1575     }
1576   }
1577 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,k_div_16)1578   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_div_16) {
1579     TEST_REQUIRES_ARM_NEON;
1580     for (size_t k = 32; k <= 160; k += 16) {
1581       GemmMicrokernelTester()
1582         .mr(2)
1583         .nr(8)
1584         .kr(2)
1585         .sr(1)
1586         .m(2)
1587         .n(8)
1588         .k(k)
1589         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1590     }
1591   }
1592 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,k_div_16_subtile)1593   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_div_16_subtile) {
1594     TEST_REQUIRES_ARM_NEON;
1595     for (size_t k = 32; k <= 160; k += 16) {
1596       for (uint32_t n = 1; n <= 8; n++) {
1597         for (uint32_t m = 1; m <= 2; m++) {
1598           GemmMicrokernelTester()
1599             .mr(2)
1600             .nr(8)
1601             .kr(2)
1602             .sr(1)
1603             .m(m)
1604             .n(n)
1605             .k(k)
1606             .iterations(1)
1607             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1608         }
1609       }
1610     }
1611   }
1612 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,n_gt_8)1613   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_gt_8) {
1614     TEST_REQUIRES_ARM_NEON;
1615     for (uint32_t n = 9; n < 16; n++) {
1616       for (size_t k = 1; k <= 80; k += 17) {
1617         GemmMicrokernelTester()
1618           .mr(2)
1619           .nr(8)
1620           .kr(2)
1621           .sr(1)
1622           .m(2)
1623           .n(n)
1624           .k(k)
1625           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1626       }
1627     }
1628   }
1629 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,n_gt_8_strided_cn)1630   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
1631     TEST_REQUIRES_ARM_NEON;
1632     for (uint32_t n = 9; n < 16; n++) {
1633       for (size_t k = 1; k <= 80; k += 17) {
1634         GemmMicrokernelTester()
1635           .mr(2)
1636           .nr(8)
1637           .kr(2)
1638           .sr(1)
1639           .m(2)
1640           .n(n)
1641           .k(k)
1642           .cn_stride(11)
1643           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1644       }
1645     }
1646   }
1647 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,n_gt_8_subtile)1648   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_gt_8_subtile) {
1649     TEST_REQUIRES_ARM_NEON;
1650     for (uint32_t n = 9; n < 16; n++) {
1651       for (size_t k = 1; k <= 80; k += 17) {
1652         for (uint32_t m = 1; m <= 2; m++) {
1653           GemmMicrokernelTester()
1654             .mr(2)
1655             .nr(8)
1656             .kr(2)
1657             .sr(1)
1658             .m(m)
1659             .n(n)
1660             .k(k)
1661             .iterations(1)
1662             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1663         }
1664       }
1665     }
1666   }
1667 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,n_div_8)1668   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_div_8) {
1669     TEST_REQUIRES_ARM_NEON;
1670     for (uint32_t n = 16; n <= 24; n += 8) {
1671       for (size_t k = 1; k <= 80; k += 17) {
1672         GemmMicrokernelTester()
1673           .mr(2)
1674           .nr(8)
1675           .kr(2)
1676           .sr(1)
1677           .m(2)
1678           .n(n)
1679           .k(k)
1680           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1681       }
1682     }
1683   }
1684 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,n_div_8_strided_cn)1685   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_div_8_strided_cn) {
1686     TEST_REQUIRES_ARM_NEON;
1687     for (uint32_t n = 16; n <= 24; n += 8) {
1688       for (size_t k = 1; k <= 80; k += 17) {
1689         GemmMicrokernelTester()
1690           .mr(2)
1691           .nr(8)
1692           .kr(2)
1693           .sr(1)
1694           .m(2)
1695           .n(n)
1696           .k(k)
1697           .cn_stride(11)
1698           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1699       }
1700     }
1701   }
1702 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,n_div_8_subtile)1703   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_div_8_subtile) {
1704     TEST_REQUIRES_ARM_NEON;
1705     for (uint32_t n = 16; n <= 24; n += 8) {
1706       for (size_t k = 1; k <= 80; k += 17) {
1707         for (uint32_t m = 1; m <= 2; m++) {
1708           GemmMicrokernelTester()
1709             .mr(2)
1710             .nr(8)
1711             .kr(2)
1712             .sr(1)
1713             .m(m)
1714             .n(n)
1715             .k(k)
1716             .iterations(1)
1717             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1718         }
1719       }
1720     }
1721   }
1722 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,small_kernel)1723   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, small_kernel) {
1724     TEST_REQUIRES_ARM_NEON;
1725     for (size_t k = 1; k <= 80; k += 17) {
1726       GemmMicrokernelTester()
1727         .mr(2)
1728         .nr(8)
1729         .kr(2)
1730         .sr(1)
1731         .m(2)
1732         .n(8)
1733         .k(k)
1734         .ks(3)
1735         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1736     }
1737   }
1738 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,small_kernel_subtile)1739   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, small_kernel_subtile) {
1740     TEST_REQUIRES_ARM_NEON;
1741     for (size_t k = 1; k <= 80; k += 17) {
1742       for (uint32_t n = 1; n <= 8; n++) {
1743         for (uint32_t m = 1; m <= 2; m++) {
1744           GemmMicrokernelTester()
1745             .mr(2)
1746             .nr(8)
1747             .kr(2)
1748             .sr(1)
1749             .m(m)
1750             .n(n)
1751             .k(k)
1752             .ks(3)
1753             .iterations(1)
1754             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1755         }
1756       }
1757     }
1758   }
1759 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,n_gt_8_small_kernel)1760   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
1761     TEST_REQUIRES_ARM_NEON;
1762     for (uint32_t n = 9; n < 16; n++) {
1763       for (size_t k = 1; k <= 80; k += 17) {
1764         GemmMicrokernelTester()
1765           .mr(2)
1766           .nr(8)
1767           .kr(2)
1768           .sr(1)
1769           .m(2)
1770           .n(n)
1771           .k(k)
1772           .ks(3)
1773           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1774       }
1775     }
1776   }
1777 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,n_div_8_small_kernel)1778   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_div_8_small_kernel) {
1779     TEST_REQUIRES_ARM_NEON;
1780     for (uint32_t n = 16; n <= 24; n += 8) {
1781       for (size_t k = 1; k <= 80; k += 17) {
1782         GemmMicrokernelTester()
1783           .mr(2)
1784           .nr(8)
1785           .kr(2)
1786           .sr(1)
1787           .m(2)
1788           .n(n)
1789           .k(k)
1790           .ks(3)
1791           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1792       }
1793     }
1794   }
1795 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,strided_cm_subtile)1796   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, strided_cm_subtile) {
1797     TEST_REQUIRES_ARM_NEON;
1798     for (size_t k = 1; k <= 80; k += 17) {
1799       for (uint32_t n = 1; n <= 8; n++) {
1800         for (uint32_t m = 1; m <= 2; m++) {
1801           GemmMicrokernelTester()
1802             .mr(2)
1803             .nr(8)
1804             .kr(2)
1805             .sr(1)
1806             .m(m)
1807             .n(n)
1808             .k(k)
1809             .cm_stride(11)
1810             .iterations(1)
1811             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1812         }
1813       }
1814     }
1815   }
1816 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,a_offset)1817   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, a_offset) {
1818     TEST_REQUIRES_ARM_NEON;
1819     for (size_t k = 1; k <= 80; k += 17) {
1820       GemmMicrokernelTester()
1821         .mr(2)
1822         .nr(8)
1823         .kr(2)
1824         .sr(1)
1825         .m(2)
1826         .n(8)
1827         .k(k)
1828         .ks(3)
1829         .a_offset(163)
1830         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1831     }
1832   }
1833 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,zero)1834   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, zero) {
1835     TEST_REQUIRES_ARM_NEON;
1836     for (size_t k = 1; k <= 80; k += 17) {
1837       for (uint32_t mz = 0; mz < 2; mz++) {
1838         GemmMicrokernelTester()
1839           .mr(2)
1840           .nr(8)
1841           .kr(2)
1842           .sr(1)
1843           .m(2)
1844           .n(8)
1845           .k(k)
1846           .ks(3)
1847           .a_offset(163)
1848           .zero_index(mz)
1849           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1850       }
1851     }
1852   }
1853 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,qmin)1854   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, qmin) {
1855     TEST_REQUIRES_ARM_NEON;
1856     GemmMicrokernelTester()
1857       .mr(2)
1858       .nr(8)
1859       .kr(2)
1860       .sr(1)
1861       .m(2)
1862       .n(8)
1863       .k(16)
1864       .qmin(128)
1865       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1866   }
1867 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,qmax)1868   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, qmax) {
1869     TEST_REQUIRES_ARM_NEON;
1870     GemmMicrokernelTester()
1871       .mr(2)
1872       .nr(8)
1873       .kr(2)
1874       .sr(1)
1875       .m(2)
1876       .n(8)
1877       .k(16)
1878       .qmax(128)
1879       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1880   }
1881 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R,strided_cm)1882   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, strided_cm) {
1883     TEST_REQUIRES_ARM_NEON;
1884     GemmMicrokernelTester()
1885       .mr(2)
1886       .nr(8)
1887       .kr(2)
1888       .sr(1)
1889       .m(2)
1890       .n(8)
1891       .k(16)
1892       .cm_stride(11)
1893       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1894   }
1895 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1896 
1897 
1898 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,k_eq_8)1899   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_eq_8) {
1900     TEST_REQUIRES_ARM_NEON;
1901     GemmMicrokernelTester()
1902       .mr(1)
1903       .nr(8)
1904       .kr(2)
1905       .sr(1)
1906       .m(1)
1907       .n(8)
1908       .k(8)
1909       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1910   }
1911 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,strided_cn)1912   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, strided_cn) {
1913     TEST_REQUIRES_ARM_NEON;
1914     GemmMicrokernelTester()
1915       .mr(1)
1916       .nr(8)
1917       .kr(2)
1918       .sr(1)
1919       .m(1)
1920       .n(8)
1921       .k(8)
1922       .cn_stride(11)
1923       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1924   }
1925 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,k_eq_8_subtile)1926   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_eq_8_subtile) {
1927     TEST_REQUIRES_ARM_NEON;
1928     for (uint32_t n = 1; n <= 8; n++) {
1929       for (uint32_t m = 1; m <= 1; m++) {
1930         GemmMicrokernelTester()
1931           .mr(1)
1932           .nr(8)
1933           .kr(2)
1934           .sr(1)
1935           .m(m)
1936           .n(n)
1937           .k(8)
1938           .iterations(1)
1939           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1940       }
1941     }
1942   }
1943 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,k_eq_8_subtile_m)1944   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_eq_8_subtile_m) {
1945     TEST_REQUIRES_ARM_NEON;
1946     for (uint32_t m = 1; m <= 1; m++) {
1947       GemmMicrokernelTester()
1948         .mr(1)
1949         .nr(8)
1950         .kr(2)
1951         .sr(1)
1952         .m(m)
1953         .n(8)
1954         .k(8)
1955         .iterations(1)
1956         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1957     }
1958   }
1959 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,k_eq_8_subtile_n)1960   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_eq_8_subtile_n) {
1961     TEST_REQUIRES_ARM_NEON;
1962     for (uint32_t n = 1; n <= 8; n++) {
1963       GemmMicrokernelTester()
1964         .mr(1)
1965         .nr(8)
1966         .kr(2)
1967         .sr(1)
1968         .m(1)
1969         .n(n)
1970         .k(8)
1971         .iterations(1)
1972         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1973     }
1974   }
1975 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,k_lt_8)1976   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_lt_8) {
1977     TEST_REQUIRES_ARM_NEON;
1978     for (size_t k = 1; k < 8; k++) {
1979       GemmMicrokernelTester()
1980         .mr(1)
1981         .nr(8)
1982         .kr(2)
1983         .sr(1)
1984         .m(1)
1985         .n(8)
1986         .k(k)
1987         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1988     }
1989   }
1990 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,k_lt_8_subtile)1991   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_lt_8_subtile) {
1992     TEST_REQUIRES_ARM_NEON;
1993     for (size_t k = 1; k < 8; k++) {
1994       for (uint32_t n = 1; n <= 8; n++) {
1995         for (uint32_t m = 1; m <= 1; m++) {
1996           GemmMicrokernelTester()
1997             .mr(1)
1998             .nr(8)
1999             .kr(2)
2000             .sr(1)
2001             .m(m)
2002             .n(n)
2003             .k(k)
2004             .iterations(1)
2005             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2006         }
2007       }
2008     }
2009   }
2010 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,k_gt_8)2011   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_gt_8) {
2012     TEST_REQUIRES_ARM_NEON;
2013     for (size_t k = 9; k < 16; k++) {
2014       GemmMicrokernelTester()
2015         .mr(1)
2016         .nr(8)
2017         .kr(2)
2018         .sr(1)
2019         .m(1)
2020         .n(8)
2021         .k(k)
2022         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2023     }
2024   }
2025 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,k_gt_8_subtile)2026   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_gt_8_subtile) {
2027     TEST_REQUIRES_ARM_NEON;
2028     for (size_t k = 9; k < 16; k++) {
2029       for (uint32_t n = 1; n <= 8; n++) {
2030         for (uint32_t m = 1; m <= 1; m++) {
2031           GemmMicrokernelTester()
2032             .mr(1)
2033             .nr(8)
2034             .kr(2)
2035             .sr(1)
2036             .m(m)
2037             .n(n)
2038             .k(k)
2039             .iterations(1)
2040             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2041         }
2042       }
2043     }
2044   }
2045 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,k_div_8)2046   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_div_8) {
2047     TEST_REQUIRES_ARM_NEON;
2048     for (size_t k = 16; k <= 80; k += 8) {
2049       GemmMicrokernelTester()
2050         .mr(1)
2051         .nr(8)
2052         .kr(2)
2053         .sr(1)
2054         .m(1)
2055         .n(8)
2056         .k(k)
2057         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2058     }
2059   }
2060 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,k_div_8_subtile)2061   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_div_8_subtile) {
2062     TEST_REQUIRES_ARM_NEON;
2063     for (size_t k = 16; k <= 80; k += 8) {
2064       for (uint32_t n = 1; n <= 8; n++) {
2065         for (uint32_t m = 1; m <= 1; m++) {
2066           GemmMicrokernelTester()
2067             .mr(1)
2068             .nr(8)
2069             .kr(2)
2070             .sr(1)
2071             .m(m)
2072             .n(n)
2073             .k(k)
2074             .iterations(1)
2075             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2076         }
2077       }
2078     }
2079   }
2080 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,n_gt_8)2081   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_gt_8) {
2082     TEST_REQUIRES_ARM_NEON;
2083     for (uint32_t n = 9; n < 16; n++) {
2084       for (size_t k = 1; k <= 40; k += 9) {
2085         GemmMicrokernelTester()
2086           .mr(1)
2087           .nr(8)
2088           .kr(2)
2089           .sr(1)
2090           .m(1)
2091           .n(n)
2092           .k(k)
2093           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2094       }
2095     }
2096   }
2097 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,n_gt_8_strided_cn)2098   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_gt_8_strided_cn) {
2099     TEST_REQUIRES_ARM_NEON;
2100     for (uint32_t n = 9; n < 16; n++) {
2101       for (size_t k = 1; k <= 40; k += 9) {
2102         GemmMicrokernelTester()
2103           .mr(1)
2104           .nr(8)
2105           .kr(2)
2106           .sr(1)
2107           .m(1)
2108           .n(n)
2109           .k(k)
2110           .cn_stride(11)
2111           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2112       }
2113     }
2114   }
2115 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,n_gt_8_subtile)2116   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_gt_8_subtile) {
2117     TEST_REQUIRES_ARM_NEON;
2118     for (uint32_t n = 9; n < 16; n++) {
2119       for (size_t k = 1; k <= 40; k += 9) {
2120         for (uint32_t m = 1; m <= 1; m++) {
2121           GemmMicrokernelTester()
2122             .mr(1)
2123             .nr(8)
2124             .kr(2)
2125             .sr(1)
2126             .m(m)
2127             .n(n)
2128             .k(k)
2129             .iterations(1)
2130             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2131         }
2132       }
2133     }
2134   }
2135 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,n_div_8)2136   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_div_8) {
2137     TEST_REQUIRES_ARM_NEON;
2138     for (uint32_t n = 16; n <= 24; n += 8) {
2139       for (size_t k = 1; k <= 40; k += 9) {
2140         GemmMicrokernelTester()
2141           .mr(1)
2142           .nr(8)
2143           .kr(2)
2144           .sr(1)
2145           .m(1)
2146           .n(n)
2147           .k(k)
2148           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2149       }
2150     }
2151   }
2152 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,n_div_8_strided_cn)2153   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_div_8_strided_cn) {
2154     TEST_REQUIRES_ARM_NEON;
2155     for (uint32_t n = 16; n <= 24; n += 8) {
2156       for (size_t k = 1; k <= 40; k += 9) {
2157         GemmMicrokernelTester()
2158           .mr(1)
2159           .nr(8)
2160           .kr(2)
2161           .sr(1)
2162           .m(1)
2163           .n(n)
2164           .k(k)
2165           .cn_stride(11)
2166           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2167       }
2168     }
2169   }
2170 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,n_div_8_subtile)2171   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_div_8_subtile) {
2172     TEST_REQUIRES_ARM_NEON;
2173     for (uint32_t n = 16; n <= 24; n += 8) {
2174       for (size_t k = 1; k <= 40; k += 9) {
2175         for (uint32_t m = 1; m <= 1; m++) {
2176           GemmMicrokernelTester()
2177             .mr(1)
2178             .nr(8)
2179             .kr(2)
2180             .sr(1)
2181             .m(m)
2182             .n(n)
2183             .k(k)
2184             .iterations(1)
2185             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2186         }
2187       }
2188     }
2189   }
2190 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,small_kernel)2191   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, small_kernel) {
2192     TEST_REQUIRES_ARM_NEON;
2193     for (size_t k = 1; k <= 40; k += 9) {
2194       GemmMicrokernelTester()
2195         .mr(1)
2196         .nr(8)
2197         .kr(2)
2198         .sr(1)
2199         .m(1)
2200         .n(8)
2201         .k(k)
2202         .ks(3)
2203         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2204     }
2205   }
2206 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,small_kernel_subtile)2207   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, small_kernel_subtile) {
2208     TEST_REQUIRES_ARM_NEON;
2209     for (size_t k = 1; k <= 40; k += 9) {
2210       for (uint32_t n = 1; n <= 8; n++) {
2211         for (uint32_t m = 1; m <= 1; m++) {
2212           GemmMicrokernelTester()
2213             .mr(1)
2214             .nr(8)
2215             .kr(2)
2216             .sr(1)
2217             .m(m)
2218             .n(n)
2219             .k(k)
2220             .ks(3)
2221             .iterations(1)
2222             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2223         }
2224       }
2225     }
2226   }
2227 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,n_gt_8_small_kernel)2228   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_gt_8_small_kernel) {
2229     TEST_REQUIRES_ARM_NEON;
2230     for (uint32_t n = 9; n < 16; n++) {
2231       for (size_t k = 1; k <= 40; k += 9) {
2232         GemmMicrokernelTester()
2233           .mr(1)
2234           .nr(8)
2235           .kr(2)
2236           .sr(1)
2237           .m(1)
2238           .n(n)
2239           .k(k)
2240           .ks(3)
2241           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2242       }
2243     }
2244   }
2245 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,n_div_8_small_kernel)2246   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_div_8_small_kernel) {
2247     TEST_REQUIRES_ARM_NEON;
2248     for (uint32_t n = 16; n <= 24; n += 8) {
2249       for (size_t k = 1; k <= 40; k += 9) {
2250         GemmMicrokernelTester()
2251           .mr(1)
2252           .nr(8)
2253           .kr(2)
2254           .sr(1)
2255           .m(1)
2256           .n(n)
2257           .k(k)
2258           .ks(3)
2259           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2260       }
2261     }
2262   }
2263 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,strided_cm_subtile)2264   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, strided_cm_subtile) {
2265     TEST_REQUIRES_ARM_NEON;
2266     for (size_t k = 1; k <= 40; k += 9) {
2267       for (uint32_t n = 1; n <= 8; n++) {
2268         for (uint32_t m = 1; m <= 1; m++) {
2269           GemmMicrokernelTester()
2270             .mr(1)
2271             .nr(8)
2272             .kr(2)
2273             .sr(1)
2274             .m(m)
2275             .n(n)
2276             .k(k)
2277             .cm_stride(11)
2278             .iterations(1)
2279             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2280         }
2281       }
2282     }
2283   }
2284 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,a_offset)2285   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, a_offset) {
2286     TEST_REQUIRES_ARM_NEON;
2287     for (size_t k = 1; k <= 40; k += 9) {
2288       GemmMicrokernelTester()
2289         .mr(1)
2290         .nr(8)
2291         .kr(2)
2292         .sr(1)
2293         .m(1)
2294         .n(8)
2295         .k(k)
2296         .ks(3)
2297         .a_offset(43)
2298         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2299     }
2300   }
2301 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,zero)2302   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, zero) {
2303     TEST_REQUIRES_ARM_NEON;
2304     for (size_t k = 1; k <= 40; k += 9) {
2305       for (uint32_t mz = 0; mz < 1; mz++) {
2306         GemmMicrokernelTester()
2307           .mr(1)
2308           .nr(8)
2309           .kr(2)
2310           .sr(1)
2311           .m(1)
2312           .n(8)
2313           .k(k)
2314           .ks(3)
2315           .a_offset(43)
2316           .zero_index(mz)
2317           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2318       }
2319     }
2320   }
2321 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,qmin)2322   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, qmin) {
2323     TEST_REQUIRES_ARM_NEON;
2324     GemmMicrokernelTester()
2325       .mr(1)
2326       .nr(8)
2327       .kr(2)
2328       .sr(1)
2329       .m(1)
2330       .n(8)
2331       .k(8)
2332       .qmin(128)
2333       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2334   }
2335 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,qmax)2336   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, qmax) {
2337     TEST_REQUIRES_ARM_NEON;
2338     GemmMicrokernelTester()
2339       .mr(1)
2340       .nr(8)
2341       .kr(2)
2342       .sr(1)
2343       .m(1)
2344       .n(8)
2345       .k(8)
2346       .qmax(128)
2347       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2348   }
2349 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R,strided_cm)2350   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, strided_cm) {
2351     TEST_REQUIRES_ARM_NEON;
2352     GemmMicrokernelTester()
2353       .mr(1)
2354       .nr(8)
2355       .kr(2)
2356       .sr(1)
2357       .m(1)
2358       .n(8)
2359       .k(8)
2360       .cm_stride(11)
2361       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2362   }
2363 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2364 
2365 
2366 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,k_eq_16)2367   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_eq_16) {
2368     TEST_REQUIRES_ARM_NEON;
2369     GemmMicrokernelTester()
2370       .mr(1)
2371       .nr(8)
2372       .kr(2)
2373       .sr(1)
2374       .m(1)
2375       .n(8)
2376       .k(16)
2377       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2378   }
2379 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,strided_cn)2380   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, strided_cn) {
2381     TEST_REQUIRES_ARM_NEON;
2382     GemmMicrokernelTester()
2383       .mr(1)
2384       .nr(8)
2385       .kr(2)
2386       .sr(1)
2387       .m(1)
2388       .n(8)
2389       .k(16)
2390       .cn_stride(11)
2391       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2392   }
2393 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,k_eq_16_subtile)2394   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile) {
2395     TEST_REQUIRES_ARM_NEON;
2396     for (uint32_t n = 1; n <= 8; n++) {
2397       for (uint32_t m = 1; m <= 1; m++) {
2398         GemmMicrokernelTester()
2399           .mr(1)
2400           .nr(8)
2401           .kr(2)
2402           .sr(1)
2403           .m(m)
2404           .n(n)
2405           .k(16)
2406           .iterations(1)
2407           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2408       }
2409     }
2410   }
2411 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,k_eq_16_subtile_m)2412   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
2413     TEST_REQUIRES_ARM_NEON;
2414     for (uint32_t m = 1; m <= 1; m++) {
2415       GemmMicrokernelTester()
2416         .mr(1)
2417         .nr(8)
2418         .kr(2)
2419         .sr(1)
2420         .m(m)
2421         .n(8)
2422         .k(16)
2423         .iterations(1)
2424         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2425     }
2426   }
2427 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,k_eq_16_subtile_n)2428   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
2429     TEST_REQUIRES_ARM_NEON;
2430     for (uint32_t n = 1; n <= 8; n++) {
2431       GemmMicrokernelTester()
2432         .mr(1)
2433         .nr(8)
2434         .kr(2)
2435         .sr(1)
2436         .m(1)
2437         .n(n)
2438         .k(16)
2439         .iterations(1)
2440         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2441     }
2442   }
2443 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,k_lt_16)2444   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_lt_16) {
2445     TEST_REQUIRES_ARM_NEON;
2446     for (size_t k = 1; k < 16; k++) {
2447       GemmMicrokernelTester()
2448         .mr(1)
2449         .nr(8)
2450         .kr(2)
2451         .sr(1)
2452         .m(1)
2453         .n(8)
2454         .k(k)
2455         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2456     }
2457   }
2458 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,k_lt_16_subtile)2459   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_lt_16_subtile) {
2460     TEST_REQUIRES_ARM_NEON;
2461     for (size_t k = 1; k < 16; k++) {
2462       for (uint32_t n = 1; n <= 8; n++) {
2463         for (uint32_t m = 1; m <= 1; m++) {
2464           GemmMicrokernelTester()
2465             .mr(1)
2466             .nr(8)
2467             .kr(2)
2468             .sr(1)
2469             .m(m)
2470             .n(n)
2471             .k(k)
2472             .iterations(1)
2473             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2474         }
2475       }
2476     }
2477   }
2478 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,k_gt_16)2479   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_gt_16) {
2480     TEST_REQUIRES_ARM_NEON;
2481     for (size_t k = 17; k < 32; k++) {
2482       GemmMicrokernelTester()
2483         .mr(1)
2484         .nr(8)
2485         .kr(2)
2486         .sr(1)
2487         .m(1)
2488         .n(8)
2489         .k(k)
2490         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2491     }
2492   }
2493 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,k_gt_16_subtile)2494   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_gt_16_subtile) {
2495     TEST_REQUIRES_ARM_NEON;
2496     for (size_t k = 17; k < 32; k++) {
2497       for (uint32_t n = 1; n <= 8; n++) {
2498         for (uint32_t m = 1; m <= 1; m++) {
2499           GemmMicrokernelTester()
2500             .mr(1)
2501             .nr(8)
2502             .kr(2)
2503             .sr(1)
2504             .m(m)
2505             .n(n)
2506             .k(k)
2507             .iterations(1)
2508             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2509         }
2510       }
2511     }
2512   }
2513 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,k_div_16)2514   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_div_16) {
2515     TEST_REQUIRES_ARM_NEON;
2516     for (size_t k = 32; k <= 160; k += 16) {
2517       GemmMicrokernelTester()
2518         .mr(1)
2519         .nr(8)
2520         .kr(2)
2521         .sr(1)
2522         .m(1)
2523         .n(8)
2524         .k(k)
2525         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2526     }
2527   }
2528 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,k_div_16_subtile)2529   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_div_16_subtile) {
2530     TEST_REQUIRES_ARM_NEON;
2531     for (size_t k = 32; k <= 160; k += 16) {
2532       for (uint32_t n = 1; n <= 8; n++) {
2533         for (uint32_t m = 1; m <= 1; m++) {
2534           GemmMicrokernelTester()
2535             .mr(1)
2536             .nr(8)
2537             .kr(2)
2538             .sr(1)
2539             .m(m)
2540             .n(n)
2541             .k(k)
2542             .iterations(1)
2543             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2544         }
2545       }
2546     }
2547   }
2548 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,n_gt_8)2549   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_gt_8) {
2550     TEST_REQUIRES_ARM_NEON;
2551     for (uint32_t n = 9; n < 16; n++) {
2552       for (size_t k = 1; k <= 80; k += 17) {
2553         GemmMicrokernelTester()
2554           .mr(1)
2555           .nr(8)
2556           .kr(2)
2557           .sr(1)
2558           .m(1)
2559           .n(n)
2560           .k(k)
2561           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2562       }
2563     }
2564   }
2565 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,n_gt_8_strided_cn)2566   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
2567     TEST_REQUIRES_ARM_NEON;
2568     for (uint32_t n = 9; n < 16; n++) {
2569       for (size_t k = 1; k <= 80; k += 17) {
2570         GemmMicrokernelTester()
2571           .mr(1)
2572           .nr(8)
2573           .kr(2)
2574           .sr(1)
2575           .m(1)
2576           .n(n)
2577           .k(k)
2578           .cn_stride(11)
2579           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2580       }
2581     }
2582   }
2583 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,n_gt_8_subtile)2584   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_gt_8_subtile) {
2585     TEST_REQUIRES_ARM_NEON;
2586     for (uint32_t n = 9; n < 16; n++) {
2587       for (size_t k = 1; k <= 80; k += 17) {
2588         for (uint32_t m = 1; m <= 1; m++) {
2589           GemmMicrokernelTester()
2590             .mr(1)
2591             .nr(8)
2592             .kr(2)
2593             .sr(1)
2594             .m(m)
2595             .n(n)
2596             .k(k)
2597             .iterations(1)
2598             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2599         }
2600       }
2601     }
2602   }
2603 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,n_div_8)2604   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_div_8) {
2605     TEST_REQUIRES_ARM_NEON;
2606     for (uint32_t n = 16; n <= 24; n += 8) {
2607       for (size_t k = 1; k <= 80; k += 17) {
2608         GemmMicrokernelTester()
2609           .mr(1)
2610           .nr(8)
2611           .kr(2)
2612           .sr(1)
2613           .m(1)
2614           .n(n)
2615           .k(k)
2616           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2617       }
2618     }
2619   }
2620 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,n_div_8_strided_cn)2621   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_div_8_strided_cn) {
2622     TEST_REQUIRES_ARM_NEON;
2623     for (uint32_t n = 16; n <= 24; n += 8) {
2624       for (size_t k = 1; k <= 80; k += 17) {
2625         GemmMicrokernelTester()
2626           .mr(1)
2627           .nr(8)
2628           .kr(2)
2629           .sr(1)
2630           .m(1)
2631           .n(n)
2632           .k(k)
2633           .cn_stride(11)
2634           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2635       }
2636     }
2637   }
2638 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,n_div_8_subtile)2639   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_div_8_subtile) {
2640     TEST_REQUIRES_ARM_NEON;
2641     for (uint32_t n = 16; n <= 24; n += 8) {
2642       for (size_t k = 1; k <= 80; k += 17) {
2643         for (uint32_t m = 1; m <= 1; m++) {
2644           GemmMicrokernelTester()
2645             .mr(1)
2646             .nr(8)
2647             .kr(2)
2648             .sr(1)
2649             .m(m)
2650             .n(n)
2651             .k(k)
2652             .iterations(1)
2653             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2654         }
2655       }
2656     }
2657   }
2658 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,small_kernel)2659   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, small_kernel) {
2660     TEST_REQUIRES_ARM_NEON;
2661     for (size_t k = 1; k <= 80; k += 17) {
2662       GemmMicrokernelTester()
2663         .mr(1)
2664         .nr(8)
2665         .kr(2)
2666         .sr(1)
2667         .m(1)
2668         .n(8)
2669         .k(k)
2670         .ks(3)
2671         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2672     }
2673   }
2674 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,small_kernel_subtile)2675   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, small_kernel_subtile) {
2676     TEST_REQUIRES_ARM_NEON;
2677     for (size_t k = 1; k <= 80; k += 17) {
2678       for (uint32_t n = 1; n <= 8; n++) {
2679         for (uint32_t m = 1; m <= 1; m++) {
2680           GemmMicrokernelTester()
2681             .mr(1)
2682             .nr(8)
2683             .kr(2)
2684             .sr(1)
2685             .m(m)
2686             .n(n)
2687             .k(k)
2688             .ks(3)
2689             .iterations(1)
2690             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2691         }
2692       }
2693     }
2694   }
2695 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,n_gt_8_small_kernel)2696   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_gt_8_small_kernel) {
2697     TEST_REQUIRES_ARM_NEON;
2698     for (uint32_t n = 9; n < 16; n++) {
2699       for (size_t k = 1; k <= 80; k += 17) {
2700         GemmMicrokernelTester()
2701           .mr(1)
2702           .nr(8)
2703           .kr(2)
2704           .sr(1)
2705           .m(1)
2706           .n(n)
2707           .k(k)
2708           .ks(3)
2709           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2710       }
2711     }
2712   }
2713 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,n_div_8_small_kernel)2714   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_div_8_small_kernel) {
2715     TEST_REQUIRES_ARM_NEON;
2716     for (uint32_t n = 16; n <= 24; n += 8) {
2717       for (size_t k = 1; k <= 80; k += 17) {
2718         GemmMicrokernelTester()
2719           .mr(1)
2720           .nr(8)
2721           .kr(2)
2722           .sr(1)
2723           .m(1)
2724           .n(n)
2725           .k(k)
2726           .ks(3)
2727           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2728       }
2729     }
2730   }
2731 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,strided_cm_subtile)2732   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, strided_cm_subtile) {
2733     TEST_REQUIRES_ARM_NEON;
2734     for (size_t k = 1; k <= 80; k += 17) {
2735       for (uint32_t n = 1; n <= 8; n++) {
2736         for (uint32_t m = 1; m <= 1; m++) {
2737           GemmMicrokernelTester()
2738             .mr(1)
2739             .nr(8)
2740             .kr(2)
2741             .sr(1)
2742             .m(m)
2743             .n(n)
2744             .k(k)
2745             .cm_stride(11)
2746             .iterations(1)
2747             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2748         }
2749       }
2750     }
2751   }
2752 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,a_offset)2753   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, a_offset) {
2754     TEST_REQUIRES_ARM_NEON;
2755     for (size_t k = 1; k <= 80; k += 17) {
2756       GemmMicrokernelTester()
2757         .mr(1)
2758         .nr(8)
2759         .kr(2)
2760         .sr(1)
2761         .m(1)
2762         .n(8)
2763         .k(k)
2764         .ks(3)
2765         .a_offset(83)
2766         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2767     }
2768   }
2769 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,zero)2770   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, zero) {
2771     TEST_REQUIRES_ARM_NEON;
2772     for (size_t k = 1; k <= 80; k += 17) {
2773       for (uint32_t mz = 0; mz < 1; mz++) {
2774         GemmMicrokernelTester()
2775           .mr(1)
2776           .nr(8)
2777           .kr(2)
2778           .sr(1)
2779           .m(1)
2780           .n(8)
2781           .k(k)
2782           .ks(3)
2783           .a_offset(83)
2784           .zero_index(mz)
2785           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2786       }
2787     }
2788   }
2789 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,qmin)2790   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, qmin) {
2791     TEST_REQUIRES_ARM_NEON;
2792     GemmMicrokernelTester()
2793       .mr(1)
2794       .nr(8)
2795       .kr(2)
2796       .sr(1)
2797       .m(1)
2798       .n(8)
2799       .k(16)
2800       .qmin(128)
2801       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2802   }
2803 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,qmax)2804   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, qmax) {
2805     TEST_REQUIRES_ARM_NEON;
2806     GemmMicrokernelTester()
2807       .mr(1)
2808       .nr(8)
2809       .kr(2)
2810       .sr(1)
2811       .m(1)
2812       .n(8)
2813       .k(16)
2814       .qmax(128)
2815       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2816   }
2817 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R,strided_cm)2818   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, strided_cm) {
2819     TEST_REQUIRES_ARM_NEON;
2820     GemmMicrokernelTester()
2821       .mr(1)
2822       .nr(8)
2823       .kr(2)
2824       .sr(1)
2825       .m(1)
2826       .n(8)
2827       .k(16)
2828       .cm_stride(11)
2829       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2830   }
2831 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2832 
2833 
2834 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,k_eq_8)2835   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, k_eq_8) {
2836     TEST_REQUIRES_ARM_NEON;
2837     GemmMicrokernelTester()
2838       .mr(1)
2839       .nr(16)
2840       .kr(2)
2841       .sr(1)
2842       .m(1)
2843       .n(16)
2844       .k(8)
2845       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2846   }
2847 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,strided_cn)2848   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, strided_cn) {
2849     TEST_REQUIRES_ARM_NEON;
2850     GemmMicrokernelTester()
2851       .mr(1)
2852       .nr(16)
2853       .kr(2)
2854       .sr(1)
2855       .m(1)
2856       .n(16)
2857       .k(8)
2858       .cn_stride(19)
2859       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2860   }
2861 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,k_eq_8_subtile)2862   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, k_eq_8_subtile) {
2863     TEST_REQUIRES_ARM_NEON;
2864     for (uint32_t n = 1; n <= 16; n++) {
2865       for (uint32_t m = 1; m <= 1; m++) {
2866         GemmMicrokernelTester()
2867           .mr(1)
2868           .nr(16)
2869           .kr(2)
2870           .sr(1)
2871           .m(m)
2872           .n(n)
2873           .k(8)
2874           .iterations(1)
2875           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2876       }
2877     }
2878   }
2879 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,k_eq_8_subtile_m)2880   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, k_eq_8_subtile_m) {
2881     TEST_REQUIRES_ARM_NEON;
2882     for (uint32_t m = 1; m <= 1; m++) {
2883       GemmMicrokernelTester()
2884         .mr(1)
2885         .nr(16)
2886         .kr(2)
2887         .sr(1)
2888         .m(m)
2889         .n(16)
2890         .k(8)
2891         .iterations(1)
2892         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2893     }
2894   }
2895 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,k_eq_8_subtile_n)2896   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, k_eq_8_subtile_n) {
2897     TEST_REQUIRES_ARM_NEON;
2898     for (uint32_t n = 1; n <= 16; n++) {
2899       GemmMicrokernelTester()
2900         .mr(1)
2901         .nr(16)
2902         .kr(2)
2903         .sr(1)
2904         .m(1)
2905         .n(n)
2906         .k(8)
2907         .iterations(1)
2908         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2909     }
2910   }
2911 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,k_lt_8)2912   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, k_lt_8) {
2913     TEST_REQUIRES_ARM_NEON;
2914     for (size_t k = 1; k < 8; k++) {
2915       GemmMicrokernelTester()
2916         .mr(1)
2917         .nr(16)
2918         .kr(2)
2919         .sr(1)
2920         .m(1)
2921         .n(16)
2922         .k(k)
2923         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2924     }
2925   }
2926 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,k_lt_8_subtile)2927   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, k_lt_8_subtile) {
2928     TEST_REQUIRES_ARM_NEON;
2929     for (size_t k = 1; k < 8; k++) {
2930       for (uint32_t n = 1; n <= 16; n++) {
2931         for (uint32_t m = 1; m <= 1; m++) {
2932           GemmMicrokernelTester()
2933             .mr(1)
2934             .nr(16)
2935             .kr(2)
2936             .sr(1)
2937             .m(m)
2938             .n(n)
2939             .k(k)
2940             .iterations(1)
2941             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2942         }
2943       }
2944     }
2945   }
2946 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,k_gt_8)2947   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, k_gt_8) {
2948     TEST_REQUIRES_ARM_NEON;
2949     for (size_t k = 9; k < 16; k++) {
2950       GemmMicrokernelTester()
2951         .mr(1)
2952         .nr(16)
2953         .kr(2)
2954         .sr(1)
2955         .m(1)
2956         .n(16)
2957         .k(k)
2958         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2959     }
2960   }
2961 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,k_gt_8_subtile)2962   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, k_gt_8_subtile) {
2963     TEST_REQUIRES_ARM_NEON;
2964     for (size_t k = 9; k < 16; k++) {
2965       for (uint32_t n = 1; n <= 16; n++) {
2966         for (uint32_t m = 1; m <= 1; m++) {
2967           GemmMicrokernelTester()
2968             .mr(1)
2969             .nr(16)
2970             .kr(2)
2971             .sr(1)
2972             .m(m)
2973             .n(n)
2974             .k(k)
2975             .iterations(1)
2976             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2977         }
2978       }
2979     }
2980   }
2981 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,k_div_8)2982   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, k_div_8) {
2983     TEST_REQUIRES_ARM_NEON;
2984     for (size_t k = 16; k <= 80; k += 8) {
2985       GemmMicrokernelTester()
2986         .mr(1)
2987         .nr(16)
2988         .kr(2)
2989         .sr(1)
2990         .m(1)
2991         .n(16)
2992         .k(k)
2993         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2994     }
2995   }
2996 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,k_div_8_subtile)2997   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, k_div_8_subtile) {
2998     TEST_REQUIRES_ARM_NEON;
2999     for (size_t k = 16; k <= 80; k += 8) {
3000       for (uint32_t n = 1; n <= 16; n++) {
3001         for (uint32_t m = 1; m <= 1; m++) {
3002           GemmMicrokernelTester()
3003             .mr(1)
3004             .nr(16)
3005             .kr(2)
3006             .sr(1)
3007             .m(m)
3008             .n(n)
3009             .k(k)
3010             .iterations(1)
3011             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3012         }
3013       }
3014     }
3015   }
3016 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,n_gt_16)3017   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, n_gt_16) {
3018     TEST_REQUIRES_ARM_NEON;
3019     for (uint32_t n = 17; n < 32; n++) {
3020       for (size_t k = 1; k <= 40; k += 9) {
3021         GemmMicrokernelTester()
3022           .mr(1)
3023           .nr(16)
3024           .kr(2)
3025           .sr(1)
3026           .m(1)
3027           .n(n)
3028           .k(k)
3029           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3030       }
3031     }
3032   }
3033 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,n_gt_16_strided_cn)3034   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, n_gt_16_strided_cn) {
3035     TEST_REQUIRES_ARM_NEON;
3036     for (uint32_t n = 17; n < 32; n++) {
3037       for (size_t k = 1; k <= 40; k += 9) {
3038         GemmMicrokernelTester()
3039           .mr(1)
3040           .nr(16)
3041           .kr(2)
3042           .sr(1)
3043           .m(1)
3044           .n(n)
3045           .k(k)
3046           .cn_stride(19)
3047           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3048       }
3049     }
3050   }
3051 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,n_gt_16_subtile)3052   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, n_gt_16_subtile) {
3053     TEST_REQUIRES_ARM_NEON;
3054     for (uint32_t n = 17; n < 32; n++) {
3055       for (size_t k = 1; k <= 40; k += 9) {
3056         for (uint32_t m = 1; m <= 1; m++) {
3057           GemmMicrokernelTester()
3058             .mr(1)
3059             .nr(16)
3060             .kr(2)
3061             .sr(1)
3062             .m(m)
3063             .n(n)
3064             .k(k)
3065             .iterations(1)
3066             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3067         }
3068       }
3069     }
3070   }
3071 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,n_div_16)3072   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, n_div_16) {
3073     TEST_REQUIRES_ARM_NEON;
3074     for (uint32_t n = 32; n <= 48; n += 16) {
3075       for (size_t k = 1; k <= 40; k += 9) {
3076         GemmMicrokernelTester()
3077           .mr(1)
3078           .nr(16)
3079           .kr(2)
3080           .sr(1)
3081           .m(1)
3082           .n(n)
3083           .k(k)
3084           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3085       }
3086     }
3087   }
3088 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,n_div_16_strided_cn)3089   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, n_div_16_strided_cn) {
3090     TEST_REQUIRES_ARM_NEON;
3091     for (uint32_t n = 32; n <= 48; n += 16) {
3092       for (size_t k = 1; k <= 40; k += 9) {
3093         GemmMicrokernelTester()
3094           .mr(1)
3095           .nr(16)
3096           .kr(2)
3097           .sr(1)
3098           .m(1)
3099           .n(n)
3100           .k(k)
3101           .cn_stride(19)
3102           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3103       }
3104     }
3105   }
3106 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,n_div_16_subtile)3107   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, n_div_16_subtile) {
3108     TEST_REQUIRES_ARM_NEON;
3109     for (uint32_t n = 32; n <= 48; n += 16) {
3110       for (size_t k = 1; k <= 40; k += 9) {
3111         for (uint32_t m = 1; m <= 1; m++) {
3112           GemmMicrokernelTester()
3113             .mr(1)
3114             .nr(16)
3115             .kr(2)
3116             .sr(1)
3117             .m(m)
3118             .n(n)
3119             .k(k)
3120             .iterations(1)
3121             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3122         }
3123       }
3124     }
3125   }
3126 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,small_kernel)3127   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, small_kernel) {
3128     TEST_REQUIRES_ARM_NEON;
3129     for (size_t k = 1; k <= 40; k += 9) {
3130       GemmMicrokernelTester()
3131         .mr(1)
3132         .nr(16)
3133         .kr(2)
3134         .sr(1)
3135         .m(1)
3136         .n(16)
3137         .k(k)
3138         .ks(3)
3139         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3140     }
3141   }
3142 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,small_kernel_subtile)3143   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, small_kernel_subtile) {
3144     TEST_REQUIRES_ARM_NEON;
3145     for (size_t k = 1; k <= 40; k += 9) {
3146       for (uint32_t n = 1; n <= 16; n++) {
3147         for (uint32_t m = 1; m <= 1; m++) {
3148           GemmMicrokernelTester()
3149             .mr(1)
3150             .nr(16)
3151             .kr(2)
3152             .sr(1)
3153             .m(m)
3154             .n(n)
3155             .k(k)
3156             .ks(3)
3157             .iterations(1)
3158             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3159         }
3160       }
3161     }
3162   }
3163 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,n_gt_16_small_kernel)3164   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, n_gt_16_small_kernel) {
3165     TEST_REQUIRES_ARM_NEON;
3166     for (uint32_t n = 17; n < 32; n++) {
3167       for (size_t k = 1; k <= 40; k += 9) {
3168         GemmMicrokernelTester()
3169           .mr(1)
3170           .nr(16)
3171           .kr(2)
3172           .sr(1)
3173           .m(1)
3174           .n(n)
3175           .k(k)
3176           .ks(3)
3177           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3178       }
3179     }
3180   }
3181 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,n_div_16_small_kernel)3182   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, n_div_16_small_kernel) {
3183     TEST_REQUIRES_ARM_NEON;
3184     for (uint32_t n = 32; n <= 48; n += 16) {
3185       for (size_t k = 1; k <= 40; k += 9) {
3186         GemmMicrokernelTester()
3187           .mr(1)
3188           .nr(16)
3189           .kr(2)
3190           .sr(1)
3191           .m(1)
3192           .n(n)
3193           .k(k)
3194           .ks(3)
3195           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3196       }
3197     }
3198   }
3199 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,strided_cm_subtile)3200   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, strided_cm_subtile) {
3201     TEST_REQUIRES_ARM_NEON;
3202     for (size_t k = 1; k <= 40; k += 9) {
3203       for (uint32_t n = 1; n <= 16; n++) {
3204         for (uint32_t m = 1; m <= 1; m++) {
3205           GemmMicrokernelTester()
3206             .mr(1)
3207             .nr(16)
3208             .kr(2)
3209             .sr(1)
3210             .m(m)
3211             .n(n)
3212             .k(k)
3213             .cm_stride(19)
3214             .iterations(1)
3215             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3216         }
3217       }
3218     }
3219   }
3220 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,a_offset)3221   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, a_offset) {
3222     TEST_REQUIRES_ARM_NEON;
3223     for (size_t k = 1; k <= 40; k += 9) {
3224       GemmMicrokernelTester()
3225         .mr(1)
3226         .nr(16)
3227         .kr(2)
3228         .sr(1)
3229         .m(1)
3230         .n(16)
3231         .k(k)
3232         .ks(3)
3233         .a_offset(43)
3234         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3235     }
3236   }
3237 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,zero)3238   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, zero) {
3239     TEST_REQUIRES_ARM_NEON;
3240     for (size_t k = 1; k <= 40; k += 9) {
3241       for (uint32_t mz = 0; mz < 1; mz++) {
3242         GemmMicrokernelTester()
3243           .mr(1)
3244           .nr(16)
3245           .kr(2)
3246           .sr(1)
3247           .m(1)
3248           .n(16)
3249           .k(k)
3250           .ks(3)
3251           .a_offset(43)
3252           .zero_index(mz)
3253           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3254       }
3255     }
3256   }
3257 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,qmin)3258   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, qmin) {
3259     TEST_REQUIRES_ARM_NEON;
3260     GemmMicrokernelTester()
3261       .mr(1)
3262       .nr(16)
3263       .kr(2)
3264       .sr(1)
3265       .m(1)
3266       .n(16)
3267       .k(8)
3268       .qmin(128)
3269       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3270   }
3271 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,qmax)3272   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, qmax) {
3273     TEST_REQUIRES_ARM_NEON;
3274     GemmMicrokernelTester()
3275       .mr(1)
3276       .nr(16)
3277       .kr(2)
3278       .sr(1)
3279       .m(1)
3280       .n(16)
3281       .k(8)
3282       .qmax(128)
3283       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3284   }
3285 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R,strided_cm)3286   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, strided_cm) {
3287     TEST_REQUIRES_ARM_NEON;
3288     GemmMicrokernelTester()
3289       .mr(1)
3290       .nr(16)
3291       .kr(2)
3292       .sr(1)
3293       .m(1)
3294       .n(16)
3295       .k(8)
3296       .cm_stride(19)
3297       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3298   }
3299 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3300 
3301 
3302 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,k_eq_8)3303   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, k_eq_8) {
3304     TEST_REQUIRES_ARM_NEON;
3305     GemmMicrokernelTester()
3306       .mr(2)
3307       .nr(16)
3308       .kr(2)
3309       .sr(1)
3310       .m(2)
3311       .n(16)
3312       .k(8)
3313       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3314   }
3315 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,strided_cn)3316   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, strided_cn) {
3317     TEST_REQUIRES_ARM_NEON;
3318     GemmMicrokernelTester()
3319       .mr(2)
3320       .nr(16)
3321       .kr(2)
3322       .sr(1)
3323       .m(2)
3324       .n(16)
3325       .k(8)
3326       .cn_stride(19)
3327       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3328   }
3329 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,k_eq_8_subtile)3330   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, k_eq_8_subtile) {
3331     TEST_REQUIRES_ARM_NEON;
3332     for (uint32_t n = 1; n <= 16; n++) {
3333       for (uint32_t m = 1; m <= 2; m++) {
3334         GemmMicrokernelTester()
3335           .mr(2)
3336           .nr(16)
3337           .kr(2)
3338           .sr(1)
3339           .m(m)
3340           .n(n)
3341           .k(8)
3342           .iterations(1)
3343           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3344       }
3345     }
3346   }
3347 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,k_eq_8_subtile_m)3348   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, k_eq_8_subtile_m) {
3349     TEST_REQUIRES_ARM_NEON;
3350     for (uint32_t m = 1; m <= 2; m++) {
3351       GemmMicrokernelTester()
3352         .mr(2)
3353         .nr(16)
3354         .kr(2)
3355         .sr(1)
3356         .m(m)
3357         .n(16)
3358         .k(8)
3359         .iterations(1)
3360         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3361     }
3362   }
3363 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,k_eq_8_subtile_n)3364   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, k_eq_8_subtile_n) {
3365     TEST_REQUIRES_ARM_NEON;
3366     for (uint32_t n = 1; n <= 16; n++) {
3367       GemmMicrokernelTester()
3368         .mr(2)
3369         .nr(16)
3370         .kr(2)
3371         .sr(1)
3372         .m(2)
3373         .n(n)
3374         .k(8)
3375         .iterations(1)
3376         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3377     }
3378   }
3379 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,k_lt_8)3380   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, k_lt_8) {
3381     TEST_REQUIRES_ARM_NEON;
3382     for (size_t k = 1; k < 8; k++) {
3383       GemmMicrokernelTester()
3384         .mr(2)
3385         .nr(16)
3386         .kr(2)
3387         .sr(1)
3388         .m(2)
3389         .n(16)
3390         .k(k)
3391         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3392     }
3393   }
3394 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,k_lt_8_subtile)3395   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, k_lt_8_subtile) {
3396     TEST_REQUIRES_ARM_NEON;
3397     for (size_t k = 1; k < 8; k++) {
3398       for (uint32_t n = 1; n <= 16; n++) {
3399         for (uint32_t m = 1; m <= 2; m++) {
3400           GemmMicrokernelTester()
3401             .mr(2)
3402             .nr(16)
3403             .kr(2)
3404             .sr(1)
3405             .m(m)
3406             .n(n)
3407             .k(k)
3408             .iterations(1)
3409             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3410         }
3411       }
3412     }
3413   }
3414 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,k_gt_8)3415   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, k_gt_8) {
3416     TEST_REQUIRES_ARM_NEON;
3417     for (size_t k = 9; k < 16; k++) {
3418       GemmMicrokernelTester()
3419         .mr(2)
3420         .nr(16)
3421         .kr(2)
3422         .sr(1)
3423         .m(2)
3424         .n(16)
3425         .k(k)
3426         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3427     }
3428   }
3429 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,k_gt_8_subtile)3430   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, k_gt_8_subtile) {
3431     TEST_REQUIRES_ARM_NEON;
3432     for (size_t k = 9; k < 16; k++) {
3433       for (uint32_t n = 1; n <= 16; n++) {
3434         for (uint32_t m = 1; m <= 2; m++) {
3435           GemmMicrokernelTester()
3436             .mr(2)
3437             .nr(16)
3438             .kr(2)
3439             .sr(1)
3440             .m(m)
3441             .n(n)
3442             .k(k)
3443             .iterations(1)
3444             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3445         }
3446       }
3447     }
3448   }
3449 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,k_div_8)3450   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, k_div_8) {
3451     TEST_REQUIRES_ARM_NEON;
3452     for (size_t k = 16; k <= 80; k += 8) {
3453       GemmMicrokernelTester()
3454         .mr(2)
3455         .nr(16)
3456         .kr(2)
3457         .sr(1)
3458         .m(2)
3459         .n(16)
3460         .k(k)
3461         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3462     }
3463   }
3464 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,k_div_8_subtile)3465   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, k_div_8_subtile) {
3466     TEST_REQUIRES_ARM_NEON;
3467     for (size_t k = 16; k <= 80; k += 8) {
3468       for (uint32_t n = 1; n <= 16; n++) {
3469         for (uint32_t m = 1; m <= 2; m++) {
3470           GemmMicrokernelTester()
3471             .mr(2)
3472             .nr(16)
3473             .kr(2)
3474             .sr(1)
3475             .m(m)
3476             .n(n)
3477             .k(k)
3478             .iterations(1)
3479             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3480         }
3481       }
3482     }
3483   }
3484 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,n_gt_16)3485   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, n_gt_16) {
3486     TEST_REQUIRES_ARM_NEON;
3487     for (uint32_t n = 17; n < 32; n++) {
3488       for (size_t k = 1; k <= 40; k += 9) {
3489         GemmMicrokernelTester()
3490           .mr(2)
3491           .nr(16)
3492           .kr(2)
3493           .sr(1)
3494           .m(2)
3495           .n(n)
3496           .k(k)
3497           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3498       }
3499     }
3500   }
3501 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,n_gt_16_strided_cn)3502   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, n_gt_16_strided_cn) {
3503     TEST_REQUIRES_ARM_NEON;
3504     for (uint32_t n = 17; n < 32; n++) {
3505       for (size_t k = 1; k <= 40; k += 9) {
3506         GemmMicrokernelTester()
3507           .mr(2)
3508           .nr(16)
3509           .kr(2)
3510           .sr(1)
3511           .m(2)
3512           .n(n)
3513           .k(k)
3514           .cn_stride(19)
3515           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3516       }
3517     }
3518   }
3519 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,n_gt_16_subtile)3520   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, n_gt_16_subtile) {
3521     TEST_REQUIRES_ARM_NEON;
3522     for (uint32_t n = 17; n < 32; n++) {
3523       for (size_t k = 1; k <= 40; k += 9) {
3524         for (uint32_t m = 1; m <= 2; m++) {
3525           GemmMicrokernelTester()
3526             .mr(2)
3527             .nr(16)
3528             .kr(2)
3529             .sr(1)
3530             .m(m)
3531             .n(n)
3532             .k(k)
3533             .iterations(1)
3534             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3535         }
3536       }
3537     }
3538   }
3539 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,n_div_16)3540   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, n_div_16) {
3541     TEST_REQUIRES_ARM_NEON;
3542     for (uint32_t n = 32; n <= 48; n += 16) {
3543       for (size_t k = 1; k <= 40; k += 9) {
3544         GemmMicrokernelTester()
3545           .mr(2)
3546           .nr(16)
3547           .kr(2)
3548           .sr(1)
3549           .m(2)
3550           .n(n)
3551           .k(k)
3552           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3553       }
3554     }
3555   }
3556 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,n_div_16_strided_cn)3557   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, n_div_16_strided_cn) {
3558     TEST_REQUIRES_ARM_NEON;
3559     for (uint32_t n = 32; n <= 48; n += 16) {
3560       for (size_t k = 1; k <= 40; k += 9) {
3561         GemmMicrokernelTester()
3562           .mr(2)
3563           .nr(16)
3564           .kr(2)
3565           .sr(1)
3566           .m(2)
3567           .n(n)
3568           .k(k)
3569           .cn_stride(19)
3570           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3571       }
3572     }
3573   }
3574 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,n_div_16_subtile)3575   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, n_div_16_subtile) {
3576     TEST_REQUIRES_ARM_NEON;
3577     for (uint32_t n = 32; n <= 48; n += 16) {
3578       for (size_t k = 1; k <= 40; k += 9) {
3579         for (uint32_t m = 1; m <= 2; m++) {
3580           GemmMicrokernelTester()
3581             .mr(2)
3582             .nr(16)
3583             .kr(2)
3584             .sr(1)
3585             .m(m)
3586             .n(n)
3587             .k(k)
3588             .iterations(1)
3589             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3590         }
3591       }
3592     }
3593   }
3594 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,small_kernel)3595   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, small_kernel) {
3596     TEST_REQUIRES_ARM_NEON;
3597     for (size_t k = 1; k <= 40; k += 9) {
3598       GemmMicrokernelTester()
3599         .mr(2)
3600         .nr(16)
3601         .kr(2)
3602         .sr(1)
3603         .m(2)
3604         .n(16)
3605         .k(k)
3606         .ks(3)
3607         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3608     }
3609   }
3610 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,small_kernel_subtile)3611   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, small_kernel_subtile) {
3612     TEST_REQUIRES_ARM_NEON;
3613     for (size_t k = 1; k <= 40; k += 9) {
3614       for (uint32_t n = 1; n <= 16; n++) {
3615         for (uint32_t m = 1; m <= 2; m++) {
3616           GemmMicrokernelTester()
3617             .mr(2)
3618             .nr(16)
3619             .kr(2)
3620             .sr(1)
3621             .m(m)
3622             .n(n)
3623             .k(k)
3624             .ks(3)
3625             .iterations(1)
3626             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3627         }
3628       }
3629     }
3630   }
3631 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,n_gt_16_small_kernel)3632   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, n_gt_16_small_kernel) {
3633     TEST_REQUIRES_ARM_NEON;
3634     for (uint32_t n = 17; n < 32; n++) {
3635       for (size_t k = 1; k <= 40; k += 9) {
3636         GemmMicrokernelTester()
3637           .mr(2)
3638           .nr(16)
3639           .kr(2)
3640           .sr(1)
3641           .m(2)
3642           .n(n)
3643           .k(k)
3644           .ks(3)
3645           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3646       }
3647     }
3648   }
3649 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,n_div_16_small_kernel)3650   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, n_div_16_small_kernel) {
3651     TEST_REQUIRES_ARM_NEON;
3652     for (uint32_t n = 32; n <= 48; n += 16) {
3653       for (size_t k = 1; k <= 40; k += 9) {
3654         GemmMicrokernelTester()
3655           .mr(2)
3656           .nr(16)
3657           .kr(2)
3658           .sr(1)
3659           .m(2)
3660           .n(n)
3661           .k(k)
3662           .ks(3)
3663           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3664       }
3665     }
3666   }
3667 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,strided_cm_subtile)3668   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, strided_cm_subtile) {
3669     TEST_REQUIRES_ARM_NEON;
3670     for (size_t k = 1; k <= 40; k += 9) {
3671       for (uint32_t n = 1; n <= 16; n++) {
3672         for (uint32_t m = 1; m <= 2; m++) {
3673           GemmMicrokernelTester()
3674             .mr(2)
3675             .nr(16)
3676             .kr(2)
3677             .sr(1)
3678             .m(m)
3679             .n(n)
3680             .k(k)
3681             .cm_stride(19)
3682             .iterations(1)
3683             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3684         }
3685       }
3686     }
3687   }
3688 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,a_offset)3689   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, a_offset) {
3690     TEST_REQUIRES_ARM_NEON;
3691     for (size_t k = 1; k <= 40; k += 9) {
3692       GemmMicrokernelTester()
3693         .mr(2)
3694         .nr(16)
3695         .kr(2)
3696         .sr(1)
3697         .m(2)
3698         .n(16)
3699         .k(k)
3700         .ks(3)
3701         .a_offset(83)
3702         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3703     }
3704   }
3705 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,zero)3706   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, zero) {
3707     TEST_REQUIRES_ARM_NEON;
3708     for (size_t k = 1; k <= 40; k += 9) {
3709       for (uint32_t mz = 0; mz < 2; mz++) {
3710         GemmMicrokernelTester()
3711           .mr(2)
3712           .nr(16)
3713           .kr(2)
3714           .sr(1)
3715           .m(2)
3716           .n(16)
3717           .k(k)
3718           .ks(3)
3719           .a_offset(83)
3720           .zero_index(mz)
3721           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3722       }
3723     }
3724   }
3725 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,qmin)3726   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, qmin) {
3727     TEST_REQUIRES_ARM_NEON;
3728     GemmMicrokernelTester()
3729       .mr(2)
3730       .nr(16)
3731       .kr(2)
3732       .sr(1)
3733       .m(2)
3734       .n(16)
3735       .k(8)
3736       .qmin(128)
3737       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3738   }
3739 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,qmax)3740   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, qmax) {
3741     TEST_REQUIRES_ARM_NEON;
3742     GemmMicrokernelTester()
3743       .mr(2)
3744       .nr(16)
3745       .kr(2)
3746       .sr(1)
3747       .m(2)
3748       .n(16)
3749       .k(8)
3750       .qmax(128)
3751       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3752   }
3753 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R,strided_cm)3754   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, strided_cm) {
3755     TEST_REQUIRES_ARM_NEON;
3756     GemmMicrokernelTester()
3757       .mr(2)
3758       .nr(16)
3759       .kr(2)
3760       .sr(1)
3761       .m(2)
3762       .n(16)
3763       .k(8)
3764       .cm_stride(19)
3765       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3766   }
3767 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3768 
3769 
3770 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,k_eq_8)3771   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_eq_8) {
3772     TEST_REQUIRES_ARM_NEON;
3773     GemmMicrokernelTester()
3774       .mr(3)
3775       .nr(16)
3776       .kr(2)
3777       .sr(1)
3778       .m(3)
3779       .n(16)
3780       .k(8)
3781       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3782   }
3783 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,strided_cn)3784   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, strided_cn) {
3785     TEST_REQUIRES_ARM_NEON;
3786     GemmMicrokernelTester()
3787       .mr(3)
3788       .nr(16)
3789       .kr(2)
3790       .sr(1)
3791       .m(3)
3792       .n(16)
3793       .k(8)
3794       .cn_stride(19)
3795       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3796   }
3797 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,k_eq_8_subtile)3798   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_eq_8_subtile) {
3799     TEST_REQUIRES_ARM_NEON;
3800     for (uint32_t n = 1; n <= 16; n++) {
3801       for (uint32_t m = 1; m <= 3; m++) {
3802         GemmMicrokernelTester()
3803           .mr(3)
3804           .nr(16)
3805           .kr(2)
3806           .sr(1)
3807           .m(m)
3808           .n(n)
3809           .k(8)
3810           .iterations(1)
3811           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3812       }
3813     }
3814   }
3815 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,k_eq_8_subtile_m)3816   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_eq_8_subtile_m) {
3817     TEST_REQUIRES_ARM_NEON;
3818     for (uint32_t m = 1; m <= 3; m++) {
3819       GemmMicrokernelTester()
3820         .mr(3)
3821         .nr(16)
3822         .kr(2)
3823         .sr(1)
3824         .m(m)
3825         .n(16)
3826         .k(8)
3827         .iterations(1)
3828         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3829     }
3830   }
3831 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,k_eq_8_subtile_n)3832   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_eq_8_subtile_n) {
3833     TEST_REQUIRES_ARM_NEON;
3834     for (uint32_t n = 1; n <= 16; n++) {
3835       GemmMicrokernelTester()
3836         .mr(3)
3837         .nr(16)
3838         .kr(2)
3839         .sr(1)
3840         .m(3)
3841         .n(n)
3842         .k(8)
3843         .iterations(1)
3844         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3845     }
3846   }
3847 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,k_lt_8)3848   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_lt_8) {
3849     TEST_REQUIRES_ARM_NEON;
3850     for (size_t k = 1; k < 8; k++) {
3851       GemmMicrokernelTester()
3852         .mr(3)
3853         .nr(16)
3854         .kr(2)
3855         .sr(1)
3856         .m(3)
3857         .n(16)
3858         .k(k)
3859         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3860     }
3861   }
3862 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,k_lt_8_subtile)3863   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_lt_8_subtile) {
3864     TEST_REQUIRES_ARM_NEON;
3865     for (size_t k = 1; k < 8; k++) {
3866       for (uint32_t n = 1; n <= 16; n++) {
3867         for (uint32_t m = 1; m <= 3; m++) {
3868           GemmMicrokernelTester()
3869             .mr(3)
3870             .nr(16)
3871             .kr(2)
3872             .sr(1)
3873             .m(m)
3874             .n(n)
3875             .k(k)
3876             .iterations(1)
3877             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3878         }
3879       }
3880     }
3881   }
3882 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,k_gt_8)3883   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_gt_8) {
3884     TEST_REQUIRES_ARM_NEON;
3885     for (size_t k = 9; k < 16; k++) {
3886       GemmMicrokernelTester()
3887         .mr(3)
3888         .nr(16)
3889         .kr(2)
3890         .sr(1)
3891         .m(3)
3892         .n(16)
3893         .k(k)
3894         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3895     }
3896   }
3897 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,k_gt_8_subtile)3898   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_gt_8_subtile) {
3899     TEST_REQUIRES_ARM_NEON;
3900     for (size_t k = 9; k < 16; k++) {
3901       for (uint32_t n = 1; n <= 16; n++) {
3902         for (uint32_t m = 1; m <= 3; m++) {
3903           GemmMicrokernelTester()
3904             .mr(3)
3905             .nr(16)
3906             .kr(2)
3907             .sr(1)
3908             .m(m)
3909             .n(n)
3910             .k(k)
3911             .iterations(1)
3912             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3913         }
3914       }
3915     }
3916   }
3917 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,k_div_8)3918   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_div_8) {
3919     TEST_REQUIRES_ARM_NEON;
3920     for (size_t k = 16; k <= 80; k += 8) {
3921       GemmMicrokernelTester()
3922         .mr(3)
3923         .nr(16)
3924         .kr(2)
3925         .sr(1)
3926         .m(3)
3927         .n(16)
3928         .k(k)
3929         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3930     }
3931   }
3932 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,k_div_8_subtile)3933   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_div_8_subtile) {
3934     TEST_REQUIRES_ARM_NEON;
3935     for (size_t k = 16; k <= 80; k += 8) {
3936       for (uint32_t n = 1; n <= 16; n++) {
3937         for (uint32_t m = 1; m <= 3; m++) {
3938           GemmMicrokernelTester()
3939             .mr(3)
3940             .nr(16)
3941             .kr(2)
3942             .sr(1)
3943             .m(m)
3944             .n(n)
3945             .k(k)
3946             .iterations(1)
3947             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3948         }
3949       }
3950     }
3951   }
3952 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,n_gt_16)3953   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, n_gt_16) {
3954     TEST_REQUIRES_ARM_NEON;
3955     for (uint32_t n = 17; n < 32; n++) {
3956       for (size_t k = 1; k <= 40; k += 9) {
3957         GemmMicrokernelTester()
3958           .mr(3)
3959           .nr(16)
3960           .kr(2)
3961           .sr(1)
3962           .m(3)
3963           .n(n)
3964           .k(k)
3965           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3966       }
3967     }
3968   }
3969 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,n_gt_16_strided_cn)3970   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, n_gt_16_strided_cn) {
3971     TEST_REQUIRES_ARM_NEON;
3972     for (uint32_t n = 17; n < 32; n++) {
3973       for (size_t k = 1; k <= 40; k += 9) {
3974         GemmMicrokernelTester()
3975           .mr(3)
3976           .nr(16)
3977           .kr(2)
3978           .sr(1)
3979           .m(3)
3980           .n(n)
3981           .k(k)
3982           .cn_stride(19)
3983           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
3984       }
3985     }
3986   }
3987 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,n_gt_16_subtile)3988   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, n_gt_16_subtile) {
3989     TEST_REQUIRES_ARM_NEON;
3990     for (uint32_t n = 17; n < 32; n++) {
3991       for (size_t k = 1; k <= 40; k += 9) {
3992         for (uint32_t m = 1; m <= 3; m++) {
3993           GemmMicrokernelTester()
3994             .mr(3)
3995             .nr(16)
3996             .kr(2)
3997             .sr(1)
3998             .m(m)
3999             .n(n)
4000             .k(k)
4001             .iterations(1)
4002             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4003         }
4004       }
4005     }
4006   }
4007 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,n_div_16)4008   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, n_div_16) {
4009     TEST_REQUIRES_ARM_NEON;
4010     for (uint32_t n = 32; n <= 48; n += 16) {
4011       for (size_t k = 1; k <= 40; k += 9) {
4012         GemmMicrokernelTester()
4013           .mr(3)
4014           .nr(16)
4015           .kr(2)
4016           .sr(1)
4017           .m(3)
4018           .n(n)
4019           .k(k)
4020           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4021       }
4022     }
4023   }
4024 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,n_div_16_strided_cn)4025   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, n_div_16_strided_cn) {
4026     TEST_REQUIRES_ARM_NEON;
4027     for (uint32_t n = 32; n <= 48; n += 16) {
4028       for (size_t k = 1; k <= 40; k += 9) {
4029         GemmMicrokernelTester()
4030           .mr(3)
4031           .nr(16)
4032           .kr(2)
4033           .sr(1)
4034           .m(3)
4035           .n(n)
4036           .k(k)
4037           .cn_stride(19)
4038           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4039       }
4040     }
4041   }
4042 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,n_div_16_subtile)4043   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, n_div_16_subtile) {
4044     TEST_REQUIRES_ARM_NEON;
4045     for (uint32_t n = 32; n <= 48; n += 16) {
4046       for (size_t k = 1; k <= 40; k += 9) {
4047         for (uint32_t m = 1; m <= 3; m++) {
4048           GemmMicrokernelTester()
4049             .mr(3)
4050             .nr(16)
4051             .kr(2)
4052             .sr(1)
4053             .m(m)
4054             .n(n)
4055             .k(k)
4056             .iterations(1)
4057             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4058         }
4059       }
4060     }
4061   }
4062 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,small_kernel)4063   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, small_kernel) {
4064     TEST_REQUIRES_ARM_NEON;
4065     for (size_t k = 1; k <= 40; k += 9) {
4066       GemmMicrokernelTester()
4067         .mr(3)
4068         .nr(16)
4069         .kr(2)
4070         .sr(1)
4071         .m(3)
4072         .n(16)
4073         .k(k)
4074         .ks(3)
4075         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4076     }
4077   }
4078 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,small_kernel_subtile)4079   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, small_kernel_subtile) {
4080     TEST_REQUIRES_ARM_NEON;
4081     for (size_t k = 1; k <= 40; k += 9) {
4082       for (uint32_t n = 1; n <= 16; n++) {
4083         for (uint32_t m = 1; m <= 3; m++) {
4084           GemmMicrokernelTester()
4085             .mr(3)
4086             .nr(16)
4087             .kr(2)
4088             .sr(1)
4089             .m(m)
4090             .n(n)
4091             .k(k)
4092             .ks(3)
4093             .iterations(1)
4094             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4095         }
4096       }
4097     }
4098   }
4099 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,n_gt_16_small_kernel)4100   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, n_gt_16_small_kernel) {
4101     TEST_REQUIRES_ARM_NEON;
4102     for (uint32_t n = 17; n < 32; n++) {
4103       for (size_t k = 1; k <= 40; k += 9) {
4104         GemmMicrokernelTester()
4105           .mr(3)
4106           .nr(16)
4107           .kr(2)
4108           .sr(1)
4109           .m(3)
4110           .n(n)
4111           .k(k)
4112           .ks(3)
4113           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4114       }
4115     }
4116   }
4117 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,n_div_16_small_kernel)4118   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, n_div_16_small_kernel) {
4119     TEST_REQUIRES_ARM_NEON;
4120     for (uint32_t n = 32; n <= 48; n += 16) {
4121       for (size_t k = 1; k <= 40; k += 9) {
4122         GemmMicrokernelTester()
4123           .mr(3)
4124           .nr(16)
4125           .kr(2)
4126           .sr(1)
4127           .m(3)
4128           .n(n)
4129           .k(k)
4130           .ks(3)
4131           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4132       }
4133     }
4134   }
4135 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,strided_cm_subtile)4136   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, strided_cm_subtile) {
4137     TEST_REQUIRES_ARM_NEON;
4138     for (size_t k = 1; k <= 40; k += 9) {
4139       for (uint32_t n = 1; n <= 16; n++) {
4140         for (uint32_t m = 1; m <= 3; m++) {
4141           GemmMicrokernelTester()
4142             .mr(3)
4143             .nr(16)
4144             .kr(2)
4145             .sr(1)
4146             .m(m)
4147             .n(n)
4148             .k(k)
4149             .cm_stride(19)
4150             .iterations(1)
4151             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4152         }
4153       }
4154     }
4155   }
4156 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,a_offset)4157   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, a_offset) {
4158     TEST_REQUIRES_ARM_NEON;
4159     for (size_t k = 1; k <= 40; k += 9) {
4160       GemmMicrokernelTester()
4161         .mr(3)
4162         .nr(16)
4163         .kr(2)
4164         .sr(1)
4165         .m(3)
4166         .n(16)
4167         .k(k)
4168         .ks(3)
4169         .a_offset(127)
4170         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4171     }
4172   }
4173 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,zero)4174   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, zero) {
4175     TEST_REQUIRES_ARM_NEON;
4176     for (size_t k = 1; k <= 40; k += 9) {
4177       for (uint32_t mz = 0; mz < 3; mz++) {
4178         GemmMicrokernelTester()
4179           .mr(3)
4180           .nr(16)
4181           .kr(2)
4182           .sr(1)
4183           .m(3)
4184           .n(16)
4185           .k(k)
4186           .ks(3)
4187           .a_offset(127)
4188           .zero_index(mz)
4189           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4190       }
4191     }
4192   }
4193 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,qmin)4194   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, qmin) {
4195     TEST_REQUIRES_ARM_NEON;
4196     GemmMicrokernelTester()
4197       .mr(3)
4198       .nr(16)
4199       .kr(2)
4200       .sr(1)
4201       .m(3)
4202       .n(16)
4203       .k(8)
4204       .qmin(128)
4205       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4206   }
4207 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,qmax)4208   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, qmax) {
4209     TEST_REQUIRES_ARM_NEON;
4210     GemmMicrokernelTester()
4211       .mr(3)
4212       .nr(16)
4213       .kr(2)
4214       .sr(1)
4215       .m(3)
4216       .n(16)
4217       .k(8)
4218       .qmax(128)
4219       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4220   }
4221 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R,strided_cm)4222   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, strided_cm) {
4223     TEST_REQUIRES_ARM_NEON;
4224     GemmMicrokernelTester()
4225       .mr(3)
4226       .nr(16)
4227       .kr(2)
4228       .sr(1)
4229       .m(3)
4230       .n(16)
4231       .k(8)
4232       .cm_stride(19)
4233       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4234   }
4235 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
4236 
4237 
4238 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,k_eq_8)4239   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, k_eq_8) {
4240     TEST_REQUIRES_ARM_NEON;
4241     GemmMicrokernelTester()
4242       .mr(4)
4243       .nr(16)
4244       .kr(2)
4245       .sr(1)
4246       .m(4)
4247       .n(16)
4248       .k(8)
4249       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4250   }
4251 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,strided_cn)4252   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, strided_cn) {
4253     TEST_REQUIRES_ARM_NEON;
4254     GemmMicrokernelTester()
4255       .mr(4)
4256       .nr(16)
4257       .kr(2)
4258       .sr(1)
4259       .m(4)
4260       .n(16)
4261       .k(8)
4262       .cn_stride(19)
4263       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4264   }
4265 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,k_eq_8_subtile)4266   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, k_eq_8_subtile) {
4267     TEST_REQUIRES_ARM_NEON;
4268     for (uint32_t n = 1; n <= 16; n++) {
4269       for (uint32_t m = 1; m <= 4; m++) {
4270         GemmMicrokernelTester()
4271           .mr(4)
4272           .nr(16)
4273           .kr(2)
4274           .sr(1)
4275           .m(m)
4276           .n(n)
4277           .k(8)
4278           .iterations(1)
4279           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4280       }
4281     }
4282   }
4283 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,k_eq_8_subtile_m)4284   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, k_eq_8_subtile_m) {
4285     TEST_REQUIRES_ARM_NEON;
4286     for (uint32_t m = 1; m <= 4; m++) {
4287       GemmMicrokernelTester()
4288         .mr(4)
4289         .nr(16)
4290         .kr(2)
4291         .sr(1)
4292         .m(m)
4293         .n(16)
4294         .k(8)
4295         .iterations(1)
4296         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4297     }
4298   }
4299 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,k_eq_8_subtile_n)4300   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, k_eq_8_subtile_n) {
4301     TEST_REQUIRES_ARM_NEON;
4302     for (uint32_t n = 1; n <= 16; n++) {
4303       GemmMicrokernelTester()
4304         .mr(4)
4305         .nr(16)
4306         .kr(2)
4307         .sr(1)
4308         .m(4)
4309         .n(n)
4310         .k(8)
4311         .iterations(1)
4312         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4313     }
4314   }
4315 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,k_lt_8)4316   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, k_lt_8) {
4317     TEST_REQUIRES_ARM_NEON;
4318     for (size_t k = 1; k < 8; k++) {
4319       GemmMicrokernelTester()
4320         .mr(4)
4321         .nr(16)
4322         .kr(2)
4323         .sr(1)
4324         .m(4)
4325         .n(16)
4326         .k(k)
4327         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4328     }
4329   }
4330 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,k_lt_8_subtile)4331   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, k_lt_8_subtile) {
4332     TEST_REQUIRES_ARM_NEON;
4333     for (size_t k = 1; k < 8; k++) {
4334       for (uint32_t n = 1; n <= 16; n++) {
4335         for (uint32_t m = 1; m <= 4; m++) {
4336           GemmMicrokernelTester()
4337             .mr(4)
4338             .nr(16)
4339             .kr(2)
4340             .sr(1)
4341             .m(m)
4342             .n(n)
4343             .k(k)
4344             .iterations(1)
4345             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4346         }
4347       }
4348     }
4349   }
4350 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,k_gt_8)4351   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, k_gt_8) {
4352     TEST_REQUIRES_ARM_NEON;
4353     for (size_t k = 9; k < 16; k++) {
4354       GemmMicrokernelTester()
4355         .mr(4)
4356         .nr(16)
4357         .kr(2)
4358         .sr(1)
4359         .m(4)
4360         .n(16)
4361         .k(k)
4362         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4363     }
4364   }
4365 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,k_gt_8_subtile)4366   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, k_gt_8_subtile) {
4367     TEST_REQUIRES_ARM_NEON;
4368     for (size_t k = 9; k < 16; k++) {
4369       for (uint32_t n = 1; n <= 16; n++) {
4370         for (uint32_t m = 1; m <= 4; m++) {
4371           GemmMicrokernelTester()
4372             .mr(4)
4373             .nr(16)
4374             .kr(2)
4375             .sr(1)
4376             .m(m)
4377             .n(n)
4378             .k(k)
4379             .iterations(1)
4380             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4381         }
4382       }
4383     }
4384   }
4385 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,k_div_8)4386   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, k_div_8) {
4387     TEST_REQUIRES_ARM_NEON;
4388     for (size_t k = 16; k <= 80; k += 8) {
4389       GemmMicrokernelTester()
4390         .mr(4)
4391         .nr(16)
4392         .kr(2)
4393         .sr(1)
4394         .m(4)
4395         .n(16)
4396         .k(k)
4397         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4398     }
4399   }
4400 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,k_div_8_subtile)4401   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, k_div_8_subtile) {
4402     TEST_REQUIRES_ARM_NEON;
4403     for (size_t k = 16; k <= 80; k += 8) {
4404       for (uint32_t n = 1; n <= 16; n++) {
4405         for (uint32_t m = 1; m <= 4; m++) {
4406           GemmMicrokernelTester()
4407             .mr(4)
4408             .nr(16)
4409             .kr(2)
4410             .sr(1)
4411             .m(m)
4412             .n(n)
4413             .k(k)
4414             .iterations(1)
4415             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4416         }
4417       }
4418     }
4419   }
4420 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,n_gt_16)4421   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, n_gt_16) {
4422     TEST_REQUIRES_ARM_NEON;
4423     for (uint32_t n = 17; n < 32; n++) {
4424       for (size_t k = 1; k <= 40; k += 9) {
4425         GemmMicrokernelTester()
4426           .mr(4)
4427           .nr(16)
4428           .kr(2)
4429           .sr(1)
4430           .m(4)
4431           .n(n)
4432           .k(k)
4433           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4434       }
4435     }
4436   }
4437 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,n_gt_16_strided_cn)4438   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, n_gt_16_strided_cn) {
4439     TEST_REQUIRES_ARM_NEON;
4440     for (uint32_t n = 17; n < 32; n++) {
4441       for (size_t k = 1; k <= 40; k += 9) {
4442         GemmMicrokernelTester()
4443           .mr(4)
4444           .nr(16)
4445           .kr(2)
4446           .sr(1)
4447           .m(4)
4448           .n(n)
4449           .k(k)
4450           .cn_stride(19)
4451           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4452       }
4453     }
4454   }
4455 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,n_gt_16_subtile)4456   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, n_gt_16_subtile) {
4457     TEST_REQUIRES_ARM_NEON;
4458     for (uint32_t n = 17; n < 32; n++) {
4459       for (size_t k = 1; k <= 40; k += 9) {
4460         for (uint32_t m = 1; m <= 4; m++) {
4461           GemmMicrokernelTester()
4462             .mr(4)
4463             .nr(16)
4464             .kr(2)
4465             .sr(1)
4466             .m(m)
4467             .n(n)
4468             .k(k)
4469             .iterations(1)
4470             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4471         }
4472       }
4473     }
4474   }
4475 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,n_div_16)4476   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, n_div_16) {
4477     TEST_REQUIRES_ARM_NEON;
4478     for (uint32_t n = 32; n <= 48; n += 16) {
4479       for (size_t k = 1; k <= 40; k += 9) {
4480         GemmMicrokernelTester()
4481           .mr(4)
4482           .nr(16)
4483           .kr(2)
4484           .sr(1)
4485           .m(4)
4486           .n(n)
4487           .k(k)
4488           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4489       }
4490     }
4491   }
4492 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,n_div_16_strided_cn)4493   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, n_div_16_strided_cn) {
4494     TEST_REQUIRES_ARM_NEON;
4495     for (uint32_t n = 32; n <= 48; n += 16) {
4496       for (size_t k = 1; k <= 40; k += 9) {
4497         GemmMicrokernelTester()
4498           .mr(4)
4499           .nr(16)
4500           .kr(2)
4501           .sr(1)
4502           .m(4)
4503           .n(n)
4504           .k(k)
4505           .cn_stride(19)
4506           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4507       }
4508     }
4509   }
4510 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,n_div_16_subtile)4511   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, n_div_16_subtile) {
4512     TEST_REQUIRES_ARM_NEON;
4513     for (uint32_t n = 32; n <= 48; n += 16) {
4514       for (size_t k = 1; k <= 40; k += 9) {
4515         for (uint32_t m = 1; m <= 4; m++) {
4516           GemmMicrokernelTester()
4517             .mr(4)
4518             .nr(16)
4519             .kr(2)
4520             .sr(1)
4521             .m(m)
4522             .n(n)
4523             .k(k)
4524             .iterations(1)
4525             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4526         }
4527       }
4528     }
4529   }
4530 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,small_kernel)4531   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, small_kernel) {
4532     TEST_REQUIRES_ARM_NEON;
4533     for (size_t k = 1; k <= 40; k += 9) {
4534       GemmMicrokernelTester()
4535         .mr(4)
4536         .nr(16)
4537         .kr(2)
4538         .sr(1)
4539         .m(4)
4540         .n(16)
4541         .k(k)
4542         .ks(3)
4543         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4544     }
4545   }
4546 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,small_kernel_subtile)4547   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, small_kernel_subtile) {
4548     TEST_REQUIRES_ARM_NEON;
4549     for (size_t k = 1; k <= 40; k += 9) {
4550       for (uint32_t n = 1; n <= 16; n++) {
4551         for (uint32_t m = 1; m <= 4; m++) {
4552           GemmMicrokernelTester()
4553             .mr(4)
4554             .nr(16)
4555             .kr(2)
4556             .sr(1)
4557             .m(m)
4558             .n(n)
4559             .k(k)
4560             .ks(3)
4561             .iterations(1)
4562             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4563         }
4564       }
4565     }
4566   }
4567 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,n_gt_16_small_kernel)4568   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, n_gt_16_small_kernel) {
4569     TEST_REQUIRES_ARM_NEON;
4570     for (uint32_t n = 17; n < 32; n++) {
4571       for (size_t k = 1; k <= 40; k += 9) {
4572         GemmMicrokernelTester()
4573           .mr(4)
4574           .nr(16)
4575           .kr(2)
4576           .sr(1)
4577           .m(4)
4578           .n(n)
4579           .k(k)
4580           .ks(3)
4581           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4582       }
4583     }
4584   }
4585 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,n_div_16_small_kernel)4586   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, n_div_16_small_kernel) {
4587     TEST_REQUIRES_ARM_NEON;
4588     for (uint32_t n = 32; n <= 48; n += 16) {
4589       for (size_t k = 1; k <= 40; k += 9) {
4590         GemmMicrokernelTester()
4591           .mr(4)
4592           .nr(16)
4593           .kr(2)
4594           .sr(1)
4595           .m(4)
4596           .n(n)
4597           .k(k)
4598           .ks(3)
4599           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4600       }
4601     }
4602   }
4603 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,strided_cm_subtile)4604   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, strided_cm_subtile) {
4605     TEST_REQUIRES_ARM_NEON;
4606     for (size_t k = 1; k <= 40; k += 9) {
4607       for (uint32_t n = 1; n <= 16; n++) {
4608         for (uint32_t m = 1; m <= 4; m++) {
4609           GemmMicrokernelTester()
4610             .mr(4)
4611             .nr(16)
4612             .kr(2)
4613             .sr(1)
4614             .m(m)
4615             .n(n)
4616             .k(k)
4617             .cm_stride(19)
4618             .iterations(1)
4619             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4620         }
4621       }
4622     }
4623   }
4624 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,a_offset)4625   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, a_offset) {
4626     TEST_REQUIRES_ARM_NEON;
4627     for (size_t k = 1; k <= 40; k += 9) {
4628       GemmMicrokernelTester()
4629         .mr(4)
4630         .nr(16)
4631         .kr(2)
4632         .sr(1)
4633         .m(4)
4634         .n(16)
4635         .k(k)
4636         .ks(3)
4637         .a_offset(163)
4638         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4639     }
4640   }
4641 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,zero)4642   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, zero) {
4643     TEST_REQUIRES_ARM_NEON;
4644     for (size_t k = 1; k <= 40; k += 9) {
4645       for (uint32_t mz = 0; mz < 4; mz++) {
4646         GemmMicrokernelTester()
4647           .mr(4)
4648           .nr(16)
4649           .kr(2)
4650           .sr(1)
4651           .m(4)
4652           .n(16)
4653           .k(k)
4654           .ks(3)
4655           .a_offset(163)
4656           .zero_index(mz)
4657           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4658       }
4659     }
4660   }
4661 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,qmin)4662   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, qmin) {
4663     TEST_REQUIRES_ARM_NEON;
4664     GemmMicrokernelTester()
4665       .mr(4)
4666       .nr(16)
4667       .kr(2)
4668       .sr(1)
4669       .m(4)
4670       .n(16)
4671       .k(8)
4672       .qmin(128)
4673       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4674   }
4675 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,qmax)4676   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, qmax) {
4677     TEST_REQUIRES_ARM_NEON;
4678     GemmMicrokernelTester()
4679       .mr(4)
4680       .nr(16)
4681       .kr(2)
4682       .sr(1)
4683       .m(4)
4684       .n(16)
4685       .k(8)
4686       .qmax(128)
4687       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4688   }
4689 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R,strided_cm)4690   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, strided_cm) {
4691     TEST_REQUIRES_ARM_NEON;
4692     GemmMicrokernelTester()
4693       .mr(4)
4694       .nr(16)
4695       .kr(2)
4696       .sr(1)
4697       .m(4)
4698       .n(16)
4699       .k(8)
4700       .cm_stride(19)
4701       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4702   }
4703 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
4704 
4705 
4706 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_eq_16)4707   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16) {
4708     TEST_REQUIRES_ARM_NEON;
4709     GemmMicrokernelTester()
4710       .mr(1)
4711       .nr(16)
4712       .kr(2)
4713       .sr(1)
4714       .m(1)
4715       .n(16)
4716       .k(16)
4717       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4718   }
4719 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,strided_cn)4720   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, strided_cn) {
4721     TEST_REQUIRES_ARM_NEON;
4722     GemmMicrokernelTester()
4723       .mr(1)
4724       .nr(16)
4725       .kr(2)
4726       .sr(1)
4727       .m(1)
4728       .n(16)
4729       .k(16)
4730       .cn_stride(19)
4731       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4732   }
4733 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_eq_16_subtile)4734   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
4735     TEST_REQUIRES_ARM_NEON;
4736     for (uint32_t n = 1; n <= 16; n++) {
4737       for (uint32_t m = 1; m <= 1; m++) {
4738         GemmMicrokernelTester()
4739           .mr(1)
4740           .nr(16)
4741           .kr(2)
4742           .sr(1)
4743           .m(m)
4744           .n(n)
4745           .k(16)
4746           .iterations(1)
4747           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4748       }
4749     }
4750   }
4751 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_eq_16_subtile_m)4752   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
4753     TEST_REQUIRES_ARM_NEON;
4754     for (uint32_t m = 1; m <= 1; m++) {
4755       GemmMicrokernelTester()
4756         .mr(1)
4757         .nr(16)
4758         .kr(2)
4759         .sr(1)
4760         .m(m)
4761         .n(16)
4762         .k(16)
4763         .iterations(1)
4764         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4765     }
4766   }
4767 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_eq_16_subtile_n)4768   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
4769     TEST_REQUIRES_ARM_NEON;
4770     for (uint32_t n = 1; n <= 16; n++) {
4771       GemmMicrokernelTester()
4772         .mr(1)
4773         .nr(16)
4774         .kr(2)
4775         .sr(1)
4776         .m(1)
4777         .n(n)
4778         .k(16)
4779         .iterations(1)
4780         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4781     }
4782   }
4783 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_lt_16)4784   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_lt_16) {
4785     TEST_REQUIRES_ARM_NEON;
4786     for (size_t k = 1; k < 16; k++) {
4787       GemmMicrokernelTester()
4788         .mr(1)
4789         .nr(16)
4790         .kr(2)
4791         .sr(1)
4792         .m(1)
4793         .n(16)
4794         .k(k)
4795         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4796     }
4797   }
4798 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_lt_16_subtile)4799   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
4800     TEST_REQUIRES_ARM_NEON;
4801     for (size_t k = 1; k < 16; k++) {
4802       for (uint32_t n = 1; n <= 16; n++) {
4803         for (uint32_t m = 1; m <= 1; m++) {
4804           GemmMicrokernelTester()
4805             .mr(1)
4806             .nr(16)
4807             .kr(2)
4808             .sr(1)
4809             .m(m)
4810             .n(n)
4811             .k(k)
4812             .iterations(1)
4813             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4814         }
4815       }
4816     }
4817   }
4818 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_gt_16)4819   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_gt_16) {
4820     TEST_REQUIRES_ARM_NEON;
4821     for (size_t k = 17; k < 32; k++) {
4822       GemmMicrokernelTester()
4823         .mr(1)
4824         .nr(16)
4825         .kr(2)
4826         .sr(1)
4827         .m(1)
4828         .n(16)
4829         .k(k)
4830         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4831     }
4832   }
4833 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_gt_16_subtile)4834   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
4835     TEST_REQUIRES_ARM_NEON;
4836     for (size_t k = 17; k < 32; k++) {
4837       for (uint32_t n = 1; n <= 16; n++) {
4838         for (uint32_t m = 1; m <= 1; m++) {
4839           GemmMicrokernelTester()
4840             .mr(1)
4841             .nr(16)
4842             .kr(2)
4843             .sr(1)
4844             .m(m)
4845             .n(n)
4846             .k(k)
4847             .iterations(1)
4848             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4849         }
4850       }
4851     }
4852   }
4853 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_div_16)4854   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_div_16) {
4855     TEST_REQUIRES_ARM_NEON;
4856     for (size_t k = 32; k <= 160; k += 16) {
4857       GemmMicrokernelTester()
4858         .mr(1)
4859         .nr(16)
4860         .kr(2)
4861         .sr(1)
4862         .m(1)
4863         .n(16)
4864         .k(k)
4865         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4866     }
4867   }
4868 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_div_16_subtile)4869   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_div_16_subtile) {
4870     TEST_REQUIRES_ARM_NEON;
4871     for (size_t k = 32; k <= 160; k += 16) {
4872       for (uint32_t n = 1; n <= 16; n++) {
4873         for (uint32_t m = 1; m <= 1; m++) {
4874           GemmMicrokernelTester()
4875             .mr(1)
4876             .nr(16)
4877             .kr(2)
4878             .sr(1)
4879             .m(m)
4880             .n(n)
4881             .k(k)
4882             .iterations(1)
4883             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4884         }
4885       }
4886     }
4887   }
4888 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,n_gt_16)4889   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_gt_16) {
4890     TEST_REQUIRES_ARM_NEON;
4891     for (uint32_t n = 17; n < 32; n++) {
4892       for (size_t k = 1; k <= 80; k += 17) {
4893         GemmMicrokernelTester()
4894           .mr(1)
4895           .nr(16)
4896           .kr(2)
4897           .sr(1)
4898           .m(1)
4899           .n(n)
4900           .k(k)
4901           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4902       }
4903     }
4904   }
4905 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,n_gt_16_strided_cn)4906   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_gt_16_strided_cn) {
4907     TEST_REQUIRES_ARM_NEON;
4908     for (uint32_t n = 17; n < 32; n++) {
4909       for (size_t k = 1; k <= 80; k += 17) {
4910         GemmMicrokernelTester()
4911           .mr(1)
4912           .nr(16)
4913           .kr(2)
4914           .sr(1)
4915           .m(1)
4916           .n(n)
4917           .k(k)
4918           .cn_stride(19)
4919           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4920       }
4921     }
4922   }
4923 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,n_gt_16_subtile)4924   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_gt_16_subtile) {
4925     TEST_REQUIRES_ARM_NEON;
4926     for (uint32_t n = 17; n < 32; n++) {
4927       for (size_t k = 1; k <= 80; k += 17) {
4928         for (uint32_t m = 1; m <= 1; m++) {
4929           GemmMicrokernelTester()
4930             .mr(1)
4931             .nr(16)
4932             .kr(2)
4933             .sr(1)
4934             .m(m)
4935             .n(n)
4936             .k(k)
4937             .iterations(1)
4938             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4939         }
4940       }
4941     }
4942   }
4943 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,n_div_16)4944   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_div_16) {
4945     TEST_REQUIRES_ARM_NEON;
4946     for (uint32_t n = 32; n <= 48; n += 16) {
4947       for (size_t k = 1; k <= 80; k += 17) {
4948         GemmMicrokernelTester()
4949           .mr(1)
4950           .nr(16)
4951           .kr(2)
4952           .sr(1)
4953           .m(1)
4954           .n(n)
4955           .k(k)
4956           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4957       }
4958     }
4959   }
4960 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,n_div_16_strided_cn)4961   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_div_16_strided_cn) {
4962     TEST_REQUIRES_ARM_NEON;
4963     for (uint32_t n = 32; n <= 48; n += 16) {
4964       for (size_t k = 1; k <= 80; k += 17) {
4965         GemmMicrokernelTester()
4966           .mr(1)
4967           .nr(16)
4968           .kr(2)
4969           .sr(1)
4970           .m(1)
4971           .n(n)
4972           .k(k)
4973           .cn_stride(19)
4974           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4975       }
4976     }
4977   }
4978 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,n_div_16_subtile)4979   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_div_16_subtile) {
4980     TEST_REQUIRES_ARM_NEON;
4981     for (uint32_t n = 32; n <= 48; n += 16) {
4982       for (size_t k = 1; k <= 80; k += 17) {
4983         for (uint32_t m = 1; m <= 1; m++) {
4984           GemmMicrokernelTester()
4985             .mr(1)
4986             .nr(16)
4987             .kr(2)
4988             .sr(1)
4989             .m(m)
4990             .n(n)
4991             .k(k)
4992             .iterations(1)
4993             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4994         }
4995       }
4996     }
4997   }
4998 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,small_kernel)4999   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, small_kernel) {
5000     TEST_REQUIRES_ARM_NEON;
5001     for (size_t k = 1; k <= 80; k += 17) {
5002       GemmMicrokernelTester()
5003         .mr(1)
5004         .nr(16)
5005         .kr(2)
5006         .sr(1)
5007         .m(1)
5008         .n(16)
5009         .k(k)
5010         .ks(3)
5011         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5012     }
5013   }
5014 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,small_kernel_subtile)5015   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, small_kernel_subtile) {
5016     TEST_REQUIRES_ARM_NEON;
5017     for (size_t k = 1; k <= 80; k += 17) {
5018       for (uint32_t n = 1; n <= 16; n++) {
5019         for (uint32_t m = 1; m <= 1; m++) {
5020           GemmMicrokernelTester()
5021             .mr(1)
5022             .nr(16)
5023             .kr(2)
5024             .sr(1)
5025             .m(m)
5026             .n(n)
5027             .k(k)
5028             .ks(3)
5029             .iterations(1)
5030             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5031         }
5032       }
5033     }
5034   }
5035 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,n_gt_16_small_kernel)5036   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_gt_16_small_kernel) {
5037     TEST_REQUIRES_ARM_NEON;
5038     for (uint32_t n = 17; n < 32; n++) {
5039       for (size_t k = 1; k <= 80; k += 17) {
5040         GemmMicrokernelTester()
5041           .mr(1)
5042           .nr(16)
5043           .kr(2)
5044           .sr(1)
5045           .m(1)
5046           .n(n)
5047           .k(k)
5048           .ks(3)
5049           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5050       }
5051     }
5052   }
5053 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,n_div_16_small_kernel)5054   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_div_16_small_kernel) {
5055     TEST_REQUIRES_ARM_NEON;
5056     for (uint32_t n = 32; n <= 48; n += 16) {
5057       for (size_t k = 1; k <= 80; k += 17) {
5058         GemmMicrokernelTester()
5059           .mr(1)
5060           .nr(16)
5061           .kr(2)
5062           .sr(1)
5063           .m(1)
5064           .n(n)
5065           .k(k)
5066           .ks(3)
5067           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5068       }
5069     }
5070   }
5071 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,strided_cm_subtile)5072   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, strided_cm_subtile) {
5073     TEST_REQUIRES_ARM_NEON;
5074     for (size_t k = 1; k <= 80; k += 17) {
5075       for (uint32_t n = 1; n <= 16; n++) {
5076         for (uint32_t m = 1; m <= 1; m++) {
5077           GemmMicrokernelTester()
5078             .mr(1)
5079             .nr(16)
5080             .kr(2)
5081             .sr(1)
5082             .m(m)
5083             .n(n)
5084             .k(k)
5085             .cm_stride(19)
5086             .iterations(1)
5087             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5088         }
5089       }
5090     }
5091   }
5092 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,a_offset)5093   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, a_offset) {
5094     TEST_REQUIRES_ARM_NEON;
5095     for (size_t k = 1; k <= 80; k += 17) {
5096       GemmMicrokernelTester()
5097         .mr(1)
5098         .nr(16)
5099         .kr(2)
5100         .sr(1)
5101         .m(1)
5102         .n(16)
5103         .k(k)
5104         .ks(3)
5105         .a_offset(83)
5106         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5107     }
5108   }
5109 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,zero)5110   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, zero) {
5111     TEST_REQUIRES_ARM_NEON;
5112     for (size_t k = 1; k <= 80; k += 17) {
5113       for (uint32_t mz = 0; mz < 1; mz++) {
5114         GemmMicrokernelTester()
5115           .mr(1)
5116           .nr(16)
5117           .kr(2)
5118           .sr(1)
5119           .m(1)
5120           .n(16)
5121           .k(k)
5122           .ks(3)
5123           .a_offset(83)
5124           .zero_index(mz)
5125           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5126       }
5127     }
5128   }
5129 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,qmin)5130   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, qmin) {
5131     TEST_REQUIRES_ARM_NEON;
5132     GemmMicrokernelTester()
5133       .mr(1)
5134       .nr(16)
5135       .kr(2)
5136       .sr(1)
5137       .m(1)
5138       .n(16)
5139       .k(16)
5140       .qmin(128)
5141       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5142   }
5143 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,qmax)5144   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, qmax) {
5145     TEST_REQUIRES_ARM_NEON;
5146     GemmMicrokernelTester()
5147       .mr(1)
5148       .nr(16)
5149       .kr(2)
5150       .sr(1)
5151       .m(1)
5152       .n(16)
5153       .k(16)
5154       .qmax(128)
5155       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5156   }
5157 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,strided_cm)5158   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, strided_cm) {
5159     TEST_REQUIRES_ARM_NEON;
5160     GemmMicrokernelTester()
5161       .mr(1)
5162       .nr(16)
5163       .kr(2)
5164       .sr(1)
5165       .m(1)
5166       .n(16)
5167       .k(16)
5168       .cm_stride(19)
5169       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5170   }
5171 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
5172 
5173 
5174 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,k_eq_16)5175   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_eq_16) {
5176     TEST_REQUIRES_ARM_NEON;
5177     GemmMicrokernelTester()
5178       .mr(2)
5179       .nr(16)
5180       .kr(2)
5181       .sr(1)
5182       .m(2)
5183       .n(16)
5184       .k(16)
5185       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5186   }
5187 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,strided_cn)5188   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, strided_cn) {
5189     TEST_REQUIRES_ARM_NEON;
5190     GemmMicrokernelTester()
5191       .mr(2)
5192       .nr(16)
5193       .kr(2)
5194       .sr(1)
5195       .m(2)
5196       .n(16)
5197       .k(16)
5198       .cn_stride(19)
5199       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5200   }
5201 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,k_eq_16_subtile)5202   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
5203     TEST_REQUIRES_ARM_NEON;
5204     for (uint32_t n = 1; n <= 16; n++) {
5205       for (uint32_t m = 1; m <= 2; m++) {
5206         GemmMicrokernelTester()
5207           .mr(2)
5208           .nr(16)
5209           .kr(2)
5210           .sr(1)
5211           .m(m)
5212           .n(n)
5213           .k(16)
5214           .iterations(1)
5215           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5216       }
5217     }
5218   }
5219 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,k_eq_16_subtile_m)5220   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
5221     TEST_REQUIRES_ARM_NEON;
5222     for (uint32_t m = 1; m <= 2; m++) {
5223       GemmMicrokernelTester()
5224         .mr(2)
5225         .nr(16)
5226         .kr(2)
5227         .sr(1)
5228         .m(m)
5229         .n(16)
5230         .k(16)
5231         .iterations(1)
5232         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5233     }
5234   }
5235 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,k_eq_16_subtile_n)5236   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
5237     TEST_REQUIRES_ARM_NEON;
5238     for (uint32_t n = 1; n <= 16; n++) {
5239       GemmMicrokernelTester()
5240         .mr(2)
5241         .nr(16)
5242         .kr(2)
5243         .sr(1)
5244         .m(2)
5245         .n(n)
5246         .k(16)
5247         .iterations(1)
5248         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5249     }
5250   }
5251 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,k_lt_16)5252   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_lt_16) {
5253     TEST_REQUIRES_ARM_NEON;
5254     for (size_t k = 1; k < 16; k++) {
5255       GemmMicrokernelTester()
5256         .mr(2)
5257         .nr(16)
5258         .kr(2)
5259         .sr(1)
5260         .m(2)
5261         .n(16)
5262         .k(k)
5263         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5264     }
5265   }
5266 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,k_lt_16_subtile)5267   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
5268     TEST_REQUIRES_ARM_NEON;
5269     for (size_t k = 1; k < 16; k++) {
5270       for (uint32_t n = 1; n <= 16; n++) {
5271         for (uint32_t m = 1; m <= 2; m++) {
5272           GemmMicrokernelTester()
5273             .mr(2)
5274             .nr(16)
5275             .kr(2)
5276             .sr(1)
5277             .m(m)
5278             .n(n)
5279             .k(k)
5280             .iterations(1)
5281             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5282         }
5283       }
5284     }
5285   }
5286 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,k_gt_16)5287   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_gt_16) {
5288     TEST_REQUIRES_ARM_NEON;
5289     for (size_t k = 17; k < 32; k++) {
5290       GemmMicrokernelTester()
5291         .mr(2)
5292         .nr(16)
5293         .kr(2)
5294         .sr(1)
5295         .m(2)
5296         .n(16)
5297         .k(k)
5298         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5299     }
5300   }
5301 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,k_gt_16_subtile)5302   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
5303     TEST_REQUIRES_ARM_NEON;
5304     for (size_t k = 17; k < 32; k++) {
5305       for (uint32_t n = 1; n <= 16; n++) {
5306         for (uint32_t m = 1; m <= 2; m++) {
5307           GemmMicrokernelTester()
5308             .mr(2)
5309             .nr(16)
5310             .kr(2)
5311             .sr(1)
5312             .m(m)
5313             .n(n)
5314             .k(k)
5315             .iterations(1)
5316             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5317         }
5318       }
5319     }
5320   }
5321 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,k_div_16)5322   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_div_16) {
5323     TEST_REQUIRES_ARM_NEON;
5324     for (size_t k = 32; k <= 160; k += 16) {
5325       GemmMicrokernelTester()
5326         .mr(2)
5327         .nr(16)
5328         .kr(2)
5329         .sr(1)
5330         .m(2)
5331         .n(16)
5332         .k(k)
5333         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5334     }
5335   }
5336 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,k_div_16_subtile)5337   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_div_16_subtile) {
5338     TEST_REQUIRES_ARM_NEON;
5339     for (size_t k = 32; k <= 160; k += 16) {
5340       for (uint32_t n = 1; n <= 16; n++) {
5341         for (uint32_t m = 1; m <= 2; m++) {
5342           GemmMicrokernelTester()
5343             .mr(2)
5344             .nr(16)
5345             .kr(2)
5346             .sr(1)
5347             .m(m)
5348             .n(n)
5349             .k(k)
5350             .iterations(1)
5351             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5352         }
5353       }
5354     }
5355   }
5356 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,n_gt_16)5357   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_gt_16) {
5358     TEST_REQUIRES_ARM_NEON;
5359     for (uint32_t n = 17; n < 32; n++) {
5360       for (size_t k = 1; k <= 80; k += 17) {
5361         GemmMicrokernelTester()
5362           .mr(2)
5363           .nr(16)
5364           .kr(2)
5365           .sr(1)
5366           .m(2)
5367           .n(n)
5368           .k(k)
5369           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5370       }
5371     }
5372   }
5373 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,n_gt_16_strided_cn)5374   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_gt_16_strided_cn) {
5375     TEST_REQUIRES_ARM_NEON;
5376     for (uint32_t n = 17; n < 32; n++) {
5377       for (size_t k = 1; k <= 80; k += 17) {
5378         GemmMicrokernelTester()
5379           .mr(2)
5380           .nr(16)
5381           .kr(2)
5382           .sr(1)
5383           .m(2)
5384           .n(n)
5385           .k(k)
5386           .cn_stride(19)
5387           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5388       }
5389     }
5390   }
5391 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,n_gt_16_subtile)5392   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_gt_16_subtile) {
5393     TEST_REQUIRES_ARM_NEON;
5394     for (uint32_t n = 17; n < 32; n++) {
5395       for (size_t k = 1; k <= 80; k += 17) {
5396         for (uint32_t m = 1; m <= 2; m++) {
5397           GemmMicrokernelTester()
5398             .mr(2)
5399             .nr(16)
5400             .kr(2)
5401             .sr(1)
5402             .m(m)
5403             .n(n)
5404             .k(k)
5405             .iterations(1)
5406             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5407         }
5408       }
5409     }
5410   }
5411 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,n_div_16)5412   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_div_16) {
5413     TEST_REQUIRES_ARM_NEON;
5414     for (uint32_t n = 32; n <= 48; n += 16) {
5415       for (size_t k = 1; k <= 80; k += 17) {
5416         GemmMicrokernelTester()
5417           .mr(2)
5418           .nr(16)
5419           .kr(2)
5420           .sr(1)
5421           .m(2)
5422           .n(n)
5423           .k(k)
5424           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5425       }
5426     }
5427   }
5428 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,n_div_16_strided_cn)5429   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_div_16_strided_cn) {
5430     TEST_REQUIRES_ARM_NEON;
5431     for (uint32_t n = 32; n <= 48; n += 16) {
5432       for (size_t k = 1; k <= 80; k += 17) {
5433         GemmMicrokernelTester()
5434           .mr(2)
5435           .nr(16)
5436           .kr(2)
5437           .sr(1)
5438           .m(2)
5439           .n(n)
5440           .k(k)
5441           .cn_stride(19)
5442           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5443       }
5444     }
5445   }
5446 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,n_div_16_subtile)5447   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_div_16_subtile) {
5448     TEST_REQUIRES_ARM_NEON;
5449     for (uint32_t n = 32; n <= 48; n += 16) {
5450       for (size_t k = 1; k <= 80; k += 17) {
5451         for (uint32_t m = 1; m <= 2; m++) {
5452           GemmMicrokernelTester()
5453             .mr(2)
5454             .nr(16)
5455             .kr(2)
5456             .sr(1)
5457             .m(m)
5458             .n(n)
5459             .k(k)
5460             .iterations(1)
5461             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5462         }
5463       }
5464     }
5465   }
5466 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,small_kernel)5467   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, small_kernel) {
5468     TEST_REQUIRES_ARM_NEON;
5469     for (size_t k = 1; k <= 80; k += 17) {
5470       GemmMicrokernelTester()
5471         .mr(2)
5472         .nr(16)
5473         .kr(2)
5474         .sr(1)
5475         .m(2)
5476         .n(16)
5477         .k(k)
5478         .ks(3)
5479         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5480     }
5481   }
5482 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,small_kernel_subtile)5483   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, small_kernel_subtile) {
5484     TEST_REQUIRES_ARM_NEON;
5485     for (size_t k = 1; k <= 80; k += 17) {
5486       for (uint32_t n = 1; n <= 16; n++) {
5487         for (uint32_t m = 1; m <= 2; m++) {
5488           GemmMicrokernelTester()
5489             .mr(2)
5490             .nr(16)
5491             .kr(2)
5492             .sr(1)
5493             .m(m)
5494             .n(n)
5495             .k(k)
5496             .ks(3)
5497             .iterations(1)
5498             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5499         }
5500       }
5501     }
5502   }
5503 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,n_gt_16_small_kernel)5504   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_gt_16_small_kernel) {
5505     TEST_REQUIRES_ARM_NEON;
5506     for (uint32_t n = 17; n < 32; n++) {
5507       for (size_t k = 1; k <= 80; k += 17) {
5508         GemmMicrokernelTester()
5509           .mr(2)
5510           .nr(16)
5511           .kr(2)
5512           .sr(1)
5513           .m(2)
5514           .n(n)
5515           .k(k)
5516           .ks(3)
5517           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5518       }
5519     }
5520   }
5521 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,n_div_16_small_kernel)5522   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_div_16_small_kernel) {
5523     TEST_REQUIRES_ARM_NEON;
5524     for (uint32_t n = 32; n <= 48; n += 16) {
5525       for (size_t k = 1; k <= 80; k += 17) {
5526         GemmMicrokernelTester()
5527           .mr(2)
5528           .nr(16)
5529           .kr(2)
5530           .sr(1)
5531           .m(2)
5532           .n(n)
5533           .k(k)
5534           .ks(3)
5535           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5536       }
5537     }
5538   }
5539 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,strided_cm_subtile)5540   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, strided_cm_subtile) {
5541     TEST_REQUIRES_ARM_NEON;
5542     for (size_t k = 1; k <= 80; k += 17) {
5543       for (uint32_t n = 1; n <= 16; n++) {
5544         for (uint32_t m = 1; m <= 2; m++) {
5545           GemmMicrokernelTester()
5546             .mr(2)
5547             .nr(16)
5548             .kr(2)
5549             .sr(1)
5550             .m(m)
5551             .n(n)
5552             .k(k)
5553             .cm_stride(19)
5554             .iterations(1)
5555             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5556         }
5557       }
5558     }
5559   }
5560 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,a_offset)5561   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, a_offset) {
5562     TEST_REQUIRES_ARM_NEON;
5563     for (size_t k = 1; k <= 80; k += 17) {
5564       GemmMicrokernelTester()
5565         .mr(2)
5566         .nr(16)
5567         .kr(2)
5568         .sr(1)
5569         .m(2)
5570         .n(16)
5571         .k(k)
5572         .ks(3)
5573         .a_offset(163)
5574         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5575     }
5576   }
5577 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,zero)5578   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, zero) {
5579     TEST_REQUIRES_ARM_NEON;
5580     for (size_t k = 1; k <= 80; k += 17) {
5581       for (uint32_t mz = 0; mz < 2; mz++) {
5582         GemmMicrokernelTester()
5583           .mr(2)
5584           .nr(16)
5585           .kr(2)
5586           .sr(1)
5587           .m(2)
5588           .n(16)
5589           .k(k)
5590           .ks(3)
5591           .a_offset(163)
5592           .zero_index(mz)
5593           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5594       }
5595     }
5596   }
5597 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,qmin)5598   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, qmin) {
5599     TEST_REQUIRES_ARM_NEON;
5600     GemmMicrokernelTester()
5601       .mr(2)
5602       .nr(16)
5603       .kr(2)
5604       .sr(1)
5605       .m(2)
5606       .n(16)
5607       .k(16)
5608       .qmin(128)
5609       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5610   }
5611 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,qmax)5612   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, qmax) {
5613     TEST_REQUIRES_ARM_NEON;
5614     GemmMicrokernelTester()
5615       .mr(2)
5616       .nr(16)
5617       .kr(2)
5618       .sr(1)
5619       .m(2)
5620       .n(16)
5621       .k(16)
5622       .qmax(128)
5623       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5624   }
5625 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R,strided_cm)5626   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, strided_cm) {
5627     TEST_REQUIRES_ARM_NEON;
5628     GemmMicrokernelTester()
5629       .mr(2)
5630       .nr(16)
5631       .kr(2)
5632       .sr(1)
5633       .m(2)
5634       .n(16)
5635       .k(16)
5636       .cm_stride(19)
5637       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5638   }
5639 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
5640 
5641 
5642 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,k_eq_16)5643   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_eq_16) {
5644     TEST_REQUIRES_ARM_NEON;
5645     GemmMicrokernelTester()
5646       .mr(3)
5647       .nr(16)
5648       .kr(2)
5649       .sr(1)
5650       .m(3)
5651       .n(16)
5652       .k(16)
5653       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5654   }
5655 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,strided_cn)5656   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, strided_cn) {
5657     TEST_REQUIRES_ARM_NEON;
5658     GemmMicrokernelTester()
5659       .mr(3)
5660       .nr(16)
5661       .kr(2)
5662       .sr(1)
5663       .m(3)
5664       .n(16)
5665       .k(16)
5666       .cn_stride(19)
5667       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5668   }
5669 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,k_eq_16_subtile)5670   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
5671     TEST_REQUIRES_ARM_NEON;
5672     for (uint32_t n = 1; n <= 16; n++) {
5673       for (uint32_t m = 1; m <= 3; m++) {
5674         GemmMicrokernelTester()
5675           .mr(3)
5676           .nr(16)
5677           .kr(2)
5678           .sr(1)
5679           .m(m)
5680           .n(n)
5681           .k(16)
5682           .iterations(1)
5683           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5684       }
5685     }
5686   }
5687 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,k_eq_16_subtile_m)5688   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
5689     TEST_REQUIRES_ARM_NEON;
5690     for (uint32_t m = 1; m <= 3; m++) {
5691       GemmMicrokernelTester()
5692         .mr(3)
5693         .nr(16)
5694         .kr(2)
5695         .sr(1)
5696         .m(m)
5697         .n(16)
5698         .k(16)
5699         .iterations(1)
5700         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5701     }
5702   }
5703 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,k_eq_16_subtile_n)5704   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
5705     TEST_REQUIRES_ARM_NEON;
5706     for (uint32_t n = 1; n <= 16; n++) {
5707       GemmMicrokernelTester()
5708         .mr(3)
5709         .nr(16)
5710         .kr(2)
5711         .sr(1)
5712         .m(3)
5713         .n(n)
5714         .k(16)
5715         .iterations(1)
5716         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5717     }
5718   }
5719 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,k_lt_16)5720   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_lt_16) {
5721     TEST_REQUIRES_ARM_NEON;
5722     for (size_t k = 1; k < 16; k++) {
5723       GemmMicrokernelTester()
5724         .mr(3)
5725         .nr(16)
5726         .kr(2)
5727         .sr(1)
5728         .m(3)
5729         .n(16)
5730         .k(k)
5731         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5732     }
5733   }
5734 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,k_lt_16_subtile)5735   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
5736     TEST_REQUIRES_ARM_NEON;
5737     for (size_t k = 1; k < 16; k++) {
5738       for (uint32_t n = 1; n <= 16; n++) {
5739         for (uint32_t m = 1; m <= 3; m++) {
5740           GemmMicrokernelTester()
5741             .mr(3)
5742             .nr(16)
5743             .kr(2)
5744             .sr(1)
5745             .m(m)
5746             .n(n)
5747             .k(k)
5748             .iterations(1)
5749             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5750         }
5751       }
5752     }
5753   }
5754 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,k_gt_16)5755   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_gt_16) {
5756     TEST_REQUIRES_ARM_NEON;
5757     for (size_t k = 17; k < 32; k++) {
5758       GemmMicrokernelTester()
5759         .mr(3)
5760         .nr(16)
5761         .kr(2)
5762         .sr(1)
5763         .m(3)
5764         .n(16)
5765         .k(k)
5766         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5767     }
5768   }
5769 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,k_gt_16_subtile)5770   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
5771     TEST_REQUIRES_ARM_NEON;
5772     for (size_t k = 17; k < 32; k++) {
5773       for (uint32_t n = 1; n <= 16; n++) {
5774         for (uint32_t m = 1; m <= 3; m++) {
5775           GemmMicrokernelTester()
5776             .mr(3)
5777             .nr(16)
5778             .kr(2)
5779             .sr(1)
5780             .m(m)
5781             .n(n)
5782             .k(k)
5783             .iterations(1)
5784             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5785         }
5786       }
5787     }
5788   }
5789 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,k_div_16)5790   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_div_16) {
5791     TEST_REQUIRES_ARM_NEON;
5792     for (size_t k = 32; k <= 160; k += 16) {
5793       GemmMicrokernelTester()
5794         .mr(3)
5795         .nr(16)
5796         .kr(2)
5797         .sr(1)
5798         .m(3)
5799         .n(16)
5800         .k(k)
5801         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5802     }
5803   }
5804 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,k_div_16_subtile)5805   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_div_16_subtile) {
5806     TEST_REQUIRES_ARM_NEON;
5807     for (size_t k = 32; k <= 160; k += 16) {
5808       for (uint32_t n = 1; n <= 16; n++) {
5809         for (uint32_t m = 1; m <= 3; m++) {
5810           GemmMicrokernelTester()
5811             .mr(3)
5812             .nr(16)
5813             .kr(2)
5814             .sr(1)
5815             .m(m)
5816             .n(n)
5817             .k(k)
5818             .iterations(1)
5819             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5820         }
5821       }
5822     }
5823   }
5824 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,n_gt_16)5825   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_gt_16) {
5826     TEST_REQUIRES_ARM_NEON;
5827     for (uint32_t n = 17; n < 32; n++) {
5828       for (size_t k = 1; k <= 80; k += 17) {
5829         GemmMicrokernelTester()
5830           .mr(3)
5831           .nr(16)
5832           .kr(2)
5833           .sr(1)
5834           .m(3)
5835           .n(n)
5836           .k(k)
5837           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5838       }
5839     }
5840   }
5841 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,n_gt_16_strided_cn)5842   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_gt_16_strided_cn) {
5843     TEST_REQUIRES_ARM_NEON;
5844     for (uint32_t n = 17; n < 32; n++) {
5845       for (size_t k = 1; k <= 80; k += 17) {
5846         GemmMicrokernelTester()
5847           .mr(3)
5848           .nr(16)
5849           .kr(2)
5850           .sr(1)
5851           .m(3)
5852           .n(n)
5853           .k(k)
5854           .cn_stride(19)
5855           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5856       }
5857     }
5858   }
5859 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,n_gt_16_subtile)5860   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_gt_16_subtile) {
5861     TEST_REQUIRES_ARM_NEON;
5862     for (uint32_t n = 17; n < 32; n++) {
5863       for (size_t k = 1; k <= 80; k += 17) {
5864         for (uint32_t m = 1; m <= 3; m++) {
5865           GemmMicrokernelTester()
5866             .mr(3)
5867             .nr(16)
5868             .kr(2)
5869             .sr(1)
5870             .m(m)
5871             .n(n)
5872             .k(k)
5873             .iterations(1)
5874             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5875         }
5876       }
5877     }
5878   }
5879 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,n_div_16)5880   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_div_16) {
5881     TEST_REQUIRES_ARM_NEON;
5882     for (uint32_t n = 32; n <= 48; n += 16) {
5883       for (size_t k = 1; k <= 80; k += 17) {
5884         GemmMicrokernelTester()
5885           .mr(3)
5886           .nr(16)
5887           .kr(2)
5888           .sr(1)
5889           .m(3)
5890           .n(n)
5891           .k(k)
5892           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5893       }
5894     }
5895   }
5896 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,n_div_16_strided_cn)5897   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_div_16_strided_cn) {
5898     TEST_REQUIRES_ARM_NEON;
5899     for (uint32_t n = 32; n <= 48; n += 16) {
5900       for (size_t k = 1; k <= 80; k += 17) {
5901         GemmMicrokernelTester()
5902           .mr(3)
5903           .nr(16)
5904           .kr(2)
5905           .sr(1)
5906           .m(3)
5907           .n(n)
5908           .k(k)
5909           .cn_stride(19)
5910           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5911       }
5912     }
5913   }
5914 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,n_div_16_subtile)5915   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_div_16_subtile) {
5916     TEST_REQUIRES_ARM_NEON;
5917     for (uint32_t n = 32; n <= 48; n += 16) {
5918       for (size_t k = 1; k <= 80; k += 17) {
5919         for (uint32_t m = 1; m <= 3; m++) {
5920           GemmMicrokernelTester()
5921             .mr(3)
5922             .nr(16)
5923             .kr(2)
5924             .sr(1)
5925             .m(m)
5926             .n(n)
5927             .k(k)
5928             .iterations(1)
5929             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5930         }
5931       }
5932     }
5933   }
5934 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,small_kernel)5935   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, small_kernel) {
5936     TEST_REQUIRES_ARM_NEON;
5937     for (size_t k = 1; k <= 80; k += 17) {
5938       GemmMicrokernelTester()
5939         .mr(3)
5940         .nr(16)
5941         .kr(2)
5942         .sr(1)
5943         .m(3)
5944         .n(16)
5945         .k(k)
5946         .ks(3)
5947         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5948     }
5949   }
5950 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,small_kernel_subtile)5951   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, small_kernel_subtile) {
5952     TEST_REQUIRES_ARM_NEON;
5953     for (size_t k = 1; k <= 80; k += 17) {
5954       for (uint32_t n = 1; n <= 16; n++) {
5955         for (uint32_t m = 1; m <= 3; m++) {
5956           GemmMicrokernelTester()
5957             .mr(3)
5958             .nr(16)
5959             .kr(2)
5960             .sr(1)
5961             .m(m)
5962             .n(n)
5963             .k(k)
5964             .ks(3)
5965             .iterations(1)
5966             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5967         }
5968       }
5969     }
5970   }
5971 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,n_gt_16_small_kernel)5972   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_gt_16_small_kernel) {
5973     TEST_REQUIRES_ARM_NEON;
5974     for (uint32_t n = 17; n < 32; n++) {
5975       for (size_t k = 1; k <= 80; k += 17) {
5976         GemmMicrokernelTester()
5977           .mr(3)
5978           .nr(16)
5979           .kr(2)
5980           .sr(1)
5981           .m(3)
5982           .n(n)
5983           .k(k)
5984           .ks(3)
5985           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5986       }
5987     }
5988   }
5989 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,n_div_16_small_kernel)5990   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_div_16_small_kernel) {
5991     TEST_REQUIRES_ARM_NEON;
5992     for (uint32_t n = 32; n <= 48; n += 16) {
5993       for (size_t k = 1; k <= 80; k += 17) {
5994         GemmMicrokernelTester()
5995           .mr(3)
5996           .nr(16)
5997           .kr(2)
5998           .sr(1)
5999           .m(3)
6000           .n(n)
6001           .k(k)
6002           .ks(3)
6003           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6004       }
6005     }
6006   }
6007 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,strided_cm_subtile)6008   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, strided_cm_subtile) {
6009     TEST_REQUIRES_ARM_NEON;
6010     for (size_t k = 1; k <= 80; k += 17) {
6011       for (uint32_t n = 1; n <= 16; n++) {
6012         for (uint32_t m = 1; m <= 3; m++) {
6013           GemmMicrokernelTester()
6014             .mr(3)
6015             .nr(16)
6016             .kr(2)
6017             .sr(1)
6018             .m(m)
6019             .n(n)
6020             .k(k)
6021             .cm_stride(19)
6022             .iterations(1)
6023             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6024         }
6025       }
6026     }
6027   }
6028 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,a_offset)6029   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, a_offset) {
6030     TEST_REQUIRES_ARM_NEON;
6031     for (size_t k = 1; k <= 80; k += 17) {
6032       GemmMicrokernelTester()
6033         .mr(3)
6034         .nr(16)
6035         .kr(2)
6036         .sr(1)
6037         .m(3)
6038         .n(16)
6039         .k(k)
6040         .ks(3)
6041         .a_offset(251)
6042         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6043     }
6044   }
6045 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,zero)6046   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, zero) {
6047     TEST_REQUIRES_ARM_NEON;
6048     for (size_t k = 1; k <= 80; k += 17) {
6049       for (uint32_t mz = 0; mz < 3; mz++) {
6050         GemmMicrokernelTester()
6051           .mr(3)
6052           .nr(16)
6053           .kr(2)
6054           .sr(1)
6055           .m(3)
6056           .n(16)
6057           .k(k)
6058           .ks(3)
6059           .a_offset(251)
6060           .zero_index(mz)
6061           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6062       }
6063     }
6064   }
6065 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,qmin)6066   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, qmin) {
6067     TEST_REQUIRES_ARM_NEON;
6068     GemmMicrokernelTester()
6069       .mr(3)
6070       .nr(16)
6071       .kr(2)
6072       .sr(1)
6073       .m(3)
6074       .n(16)
6075       .k(16)
6076       .qmin(128)
6077       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6078   }
6079 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,qmax)6080   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, qmax) {
6081     TEST_REQUIRES_ARM_NEON;
6082     GemmMicrokernelTester()
6083       .mr(3)
6084       .nr(16)
6085       .kr(2)
6086       .sr(1)
6087       .m(3)
6088       .n(16)
6089       .k(16)
6090       .qmax(128)
6091       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6092   }
6093 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R,strided_cm)6094   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, strided_cm) {
6095     TEST_REQUIRES_ARM_NEON;
6096     GemmMicrokernelTester()
6097       .mr(3)
6098       .nr(16)
6099       .kr(2)
6100       .sr(1)
6101       .m(3)
6102       .n(16)
6103       .k(16)
6104       .cm_stride(19)
6105       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6106   }
6107 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
6108 
6109 
6110 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,k_eq_16)6111   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_eq_16) {
6112     TEST_REQUIRES_ARM_NEON;
6113     GemmMicrokernelTester()
6114       .mr(4)
6115       .nr(16)
6116       .kr(2)
6117       .sr(1)
6118       .m(4)
6119       .n(16)
6120       .k(16)
6121       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6122   }
6123 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,strided_cn)6124   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, strided_cn) {
6125     TEST_REQUIRES_ARM_NEON;
6126     GemmMicrokernelTester()
6127       .mr(4)
6128       .nr(16)
6129       .kr(2)
6130       .sr(1)
6131       .m(4)
6132       .n(16)
6133       .k(16)
6134       .cn_stride(19)
6135       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6136   }
6137 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,k_eq_16_subtile)6138   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
6139     TEST_REQUIRES_ARM_NEON;
6140     for (uint32_t n = 1; n <= 16; n++) {
6141       for (uint32_t m = 1; m <= 4; m++) {
6142         GemmMicrokernelTester()
6143           .mr(4)
6144           .nr(16)
6145           .kr(2)
6146           .sr(1)
6147           .m(m)
6148           .n(n)
6149           .k(16)
6150           .iterations(1)
6151           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6152       }
6153     }
6154   }
6155 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,k_eq_16_subtile_m)6156   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
6157     TEST_REQUIRES_ARM_NEON;
6158     for (uint32_t m = 1; m <= 4; m++) {
6159       GemmMicrokernelTester()
6160         .mr(4)
6161         .nr(16)
6162         .kr(2)
6163         .sr(1)
6164         .m(m)
6165         .n(16)
6166         .k(16)
6167         .iterations(1)
6168         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6169     }
6170   }
6171 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,k_eq_16_subtile_n)6172   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
6173     TEST_REQUIRES_ARM_NEON;
6174     for (uint32_t n = 1; n <= 16; n++) {
6175       GemmMicrokernelTester()
6176         .mr(4)
6177         .nr(16)
6178         .kr(2)
6179         .sr(1)
6180         .m(4)
6181         .n(n)
6182         .k(16)
6183         .iterations(1)
6184         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6185     }
6186   }
6187 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,k_lt_16)6188   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_lt_16) {
6189     TEST_REQUIRES_ARM_NEON;
6190     for (size_t k = 1; k < 16; k++) {
6191       GemmMicrokernelTester()
6192         .mr(4)
6193         .nr(16)
6194         .kr(2)
6195         .sr(1)
6196         .m(4)
6197         .n(16)
6198         .k(k)
6199         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6200     }
6201   }
6202 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,k_lt_16_subtile)6203   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
6204     TEST_REQUIRES_ARM_NEON;
6205     for (size_t k = 1; k < 16; k++) {
6206       for (uint32_t n = 1; n <= 16; n++) {
6207         for (uint32_t m = 1; m <= 4; m++) {
6208           GemmMicrokernelTester()
6209             .mr(4)
6210             .nr(16)
6211             .kr(2)
6212             .sr(1)
6213             .m(m)
6214             .n(n)
6215             .k(k)
6216             .iterations(1)
6217             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6218         }
6219       }
6220     }
6221   }
6222 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,k_gt_16)6223   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_gt_16) {
6224     TEST_REQUIRES_ARM_NEON;
6225     for (size_t k = 17; k < 32; k++) {
6226       GemmMicrokernelTester()
6227         .mr(4)
6228         .nr(16)
6229         .kr(2)
6230         .sr(1)
6231         .m(4)
6232         .n(16)
6233         .k(k)
6234         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6235     }
6236   }
6237 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,k_gt_16_subtile)6238   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
6239     TEST_REQUIRES_ARM_NEON;
6240     for (size_t k = 17; k < 32; k++) {
6241       for (uint32_t n = 1; n <= 16; n++) {
6242         for (uint32_t m = 1; m <= 4; m++) {
6243           GemmMicrokernelTester()
6244             .mr(4)
6245             .nr(16)
6246             .kr(2)
6247             .sr(1)
6248             .m(m)
6249             .n(n)
6250             .k(k)
6251             .iterations(1)
6252             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6253         }
6254       }
6255     }
6256   }
6257 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,k_div_16)6258   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_div_16) {
6259     TEST_REQUIRES_ARM_NEON;
6260     for (size_t k = 32; k <= 160; k += 16) {
6261       GemmMicrokernelTester()
6262         .mr(4)
6263         .nr(16)
6264         .kr(2)
6265         .sr(1)
6266         .m(4)
6267         .n(16)
6268         .k(k)
6269         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6270     }
6271   }
6272 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,k_div_16_subtile)6273   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_div_16_subtile) {
6274     TEST_REQUIRES_ARM_NEON;
6275     for (size_t k = 32; k <= 160; k += 16) {
6276       for (uint32_t n = 1; n <= 16; n++) {
6277         for (uint32_t m = 1; m <= 4; m++) {
6278           GemmMicrokernelTester()
6279             .mr(4)
6280             .nr(16)
6281             .kr(2)
6282             .sr(1)
6283             .m(m)
6284             .n(n)
6285             .k(k)
6286             .iterations(1)
6287             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6288         }
6289       }
6290     }
6291   }
6292 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,n_gt_16)6293   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, n_gt_16) {
6294     TEST_REQUIRES_ARM_NEON;
6295     for (uint32_t n = 17; n < 32; n++) {
6296       for (size_t k = 1; k <= 80; k += 17) {
6297         GemmMicrokernelTester()
6298           .mr(4)
6299           .nr(16)
6300           .kr(2)
6301           .sr(1)
6302           .m(4)
6303           .n(n)
6304           .k(k)
6305           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6306       }
6307     }
6308   }
6309 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,n_gt_16_strided_cn)6310   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, n_gt_16_strided_cn) {
6311     TEST_REQUIRES_ARM_NEON;
6312     for (uint32_t n = 17; n < 32; n++) {
6313       for (size_t k = 1; k <= 80; k += 17) {
6314         GemmMicrokernelTester()
6315           .mr(4)
6316           .nr(16)
6317           .kr(2)
6318           .sr(1)
6319           .m(4)
6320           .n(n)
6321           .k(k)
6322           .cn_stride(19)
6323           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6324       }
6325     }
6326   }
6327 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,n_gt_16_subtile)6328   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, n_gt_16_subtile) {
6329     TEST_REQUIRES_ARM_NEON;
6330     for (uint32_t n = 17; n < 32; n++) {
6331       for (size_t k = 1; k <= 80; k += 17) {
6332         for (uint32_t m = 1; m <= 4; m++) {
6333           GemmMicrokernelTester()
6334             .mr(4)
6335             .nr(16)
6336             .kr(2)
6337             .sr(1)
6338             .m(m)
6339             .n(n)
6340             .k(k)
6341             .iterations(1)
6342             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6343         }
6344       }
6345     }
6346   }
6347 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,n_div_16)6348   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, n_div_16) {
6349     TEST_REQUIRES_ARM_NEON;
6350     for (uint32_t n = 32; n <= 48; n += 16) {
6351       for (size_t k = 1; k <= 80; k += 17) {
6352         GemmMicrokernelTester()
6353           .mr(4)
6354           .nr(16)
6355           .kr(2)
6356           .sr(1)
6357           .m(4)
6358           .n(n)
6359           .k(k)
6360           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6361       }
6362     }
6363   }
6364 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,n_div_16_strided_cn)6365   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, n_div_16_strided_cn) {
6366     TEST_REQUIRES_ARM_NEON;
6367     for (uint32_t n = 32; n <= 48; n += 16) {
6368       for (size_t k = 1; k <= 80; k += 17) {
6369         GemmMicrokernelTester()
6370           .mr(4)
6371           .nr(16)
6372           .kr(2)
6373           .sr(1)
6374           .m(4)
6375           .n(n)
6376           .k(k)
6377           .cn_stride(19)
6378           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6379       }
6380     }
6381   }
6382 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,n_div_16_subtile)6383   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, n_div_16_subtile) {
6384     TEST_REQUIRES_ARM_NEON;
6385     for (uint32_t n = 32; n <= 48; n += 16) {
6386       for (size_t k = 1; k <= 80; k += 17) {
6387         for (uint32_t m = 1; m <= 4; m++) {
6388           GemmMicrokernelTester()
6389             .mr(4)
6390             .nr(16)
6391             .kr(2)
6392             .sr(1)
6393             .m(m)
6394             .n(n)
6395             .k(k)
6396             .iterations(1)
6397             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6398         }
6399       }
6400     }
6401   }
6402 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,small_kernel)6403   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, small_kernel) {
6404     TEST_REQUIRES_ARM_NEON;
6405     for (size_t k = 1; k <= 80; k += 17) {
6406       GemmMicrokernelTester()
6407         .mr(4)
6408         .nr(16)
6409         .kr(2)
6410         .sr(1)
6411         .m(4)
6412         .n(16)
6413         .k(k)
6414         .ks(3)
6415         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6416     }
6417   }
6418 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,small_kernel_subtile)6419   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, small_kernel_subtile) {
6420     TEST_REQUIRES_ARM_NEON;
6421     for (size_t k = 1; k <= 80; k += 17) {
6422       for (uint32_t n = 1; n <= 16; n++) {
6423         for (uint32_t m = 1; m <= 4; m++) {
6424           GemmMicrokernelTester()
6425             .mr(4)
6426             .nr(16)
6427             .kr(2)
6428             .sr(1)
6429             .m(m)
6430             .n(n)
6431             .k(k)
6432             .ks(3)
6433             .iterations(1)
6434             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6435         }
6436       }
6437     }
6438   }
6439 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,n_gt_16_small_kernel)6440   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, n_gt_16_small_kernel) {
6441     TEST_REQUIRES_ARM_NEON;
6442     for (uint32_t n = 17; n < 32; n++) {
6443       for (size_t k = 1; k <= 80; k += 17) {
6444         GemmMicrokernelTester()
6445           .mr(4)
6446           .nr(16)
6447           .kr(2)
6448           .sr(1)
6449           .m(4)
6450           .n(n)
6451           .k(k)
6452           .ks(3)
6453           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6454       }
6455     }
6456   }
6457 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,n_div_16_small_kernel)6458   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, n_div_16_small_kernel) {
6459     TEST_REQUIRES_ARM_NEON;
6460     for (uint32_t n = 32; n <= 48; n += 16) {
6461       for (size_t k = 1; k <= 80; k += 17) {
6462         GemmMicrokernelTester()
6463           .mr(4)
6464           .nr(16)
6465           .kr(2)
6466           .sr(1)
6467           .m(4)
6468           .n(n)
6469           .k(k)
6470           .ks(3)
6471           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6472       }
6473     }
6474   }
6475 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,strided_cm_subtile)6476   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, strided_cm_subtile) {
6477     TEST_REQUIRES_ARM_NEON;
6478     for (size_t k = 1; k <= 80; k += 17) {
6479       for (uint32_t n = 1; n <= 16; n++) {
6480         for (uint32_t m = 1; m <= 4; m++) {
6481           GemmMicrokernelTester()
6482             .mr(4)
6483             .nr(16)
6484             .kr(2)
6485             .sr(1)
6486             .m(m)
6487             .n(n)
6488             .k(k)
6489             .cm_stride(19)
6490             .iterations(1)
6491             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6492         }
6493       }
6494     }
6495   }
6496 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,a_offset)6497   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, a_offset) {
6498     TEST_REQUIRES_ARM_NEON;
6499     for (size_t k = 1; k <= 80; k += 17) {
6500       GemmMicrokernelTester()
6501         .mr(4)
6502         .nr(16)
6503         .kr(2)
6504         .sr(1)
6505         .m(4)
6506         .n(16)
6507         .k(k)
6508         .ks(3)
6509         .a_offset(331)
6510         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6511     }
6512   }
6513 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,zero)6514   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, zero) {
6515     TEST_REQUIRES_ARM_NEON;
6516     for (size_t k = 1; k <= 80; k += 17) {
6517       for (uint32_t mz = 0; mz < 4; mz++) {
6518         GemmMicrokernelTester()
6519           .mr(4)
6520           .nr(16)
6521           .kr(2)
6522           .sr(1)
6523           .m(4)
6524           .n(16)
6525           .k(k)
6526           .ks(3)
6527           .a_offset(331)
6528           .zero_index(mz)
6529           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6530       }
6531     }
6532   }
6533 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,qmin)6534   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, qmin) {
6535     TEST_REQUIRES_ARM_NEON;
6536     GemmMicrokernelTester()
6537       .mr(4)
6538       .nr(16)
6539       .kr(2)
6540       .sr(1)
6541       .m(4)
6542       .n(16)
6543       .k(16)
6544       .qmin(128)
6545       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6546   }
6547 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,qmax)6548   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, qmax) {
6549     TEST_REQUIRES_ARM_NEON;
6550     GemmMicrokernelTester()
6551       .mr(4)
6552       .nr(16)
6553       .kr(2)
6554       .sr(1)
6555       .m(4)
6556       .n(16)
6557       .k(16)
6558       .qmax(128)
6559       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6560   }
6561 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R,strided_cm)6562   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, strided_cm) {
6563     TEST_REQUIRES_ARM_NEON;
6564     GemmMicrokernelTester()
6565       .mr(4)
6566       .nr(16)
6567       .kr(2)
6568       .sr(1)
6569       .m(4)
6570       .n(16)
6571       .k(16)
6572       .cm_stride(19)
6573       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6574   }
6575 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
6576 
6577 
6578 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,k_eq_8)6579   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_eq_8) {
6580     TEST_REQUIRES_ARM_NEON;
6581     GemmMicrokernelTester()
6582       .mr(3)
6583       .nr(8)
6584       .kr(4)
6585       .sr(2)
6586       .m(3)
6587       .n(8)
6588       .k(8)
6589       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6590   }
6591 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,strided_cn)6592   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, strided_cn) {
6593     TEST_REQUIRES_ARM_NEON;
6594     GemmMicrokernelTester()
6595       .mr(3)
6596       .nr(8)
6597       .kr(4)
6598       .sr(2)
6599       .m(3)
6600       .n(8)
6601       .k(8)
6602       .cn_stride(11)
6603       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6604   }
6605 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,k_eq_8_subtile)6606   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_eq_8_subtile) {
6607     TEST_REQUIRES_ARM_NEON;
6608     for (uint32_t n = 1; n <= 8; n++) {
6609       for (uint32_t m = 1; m <= 3; m++) {
6610         GemmMicrokernelTester()
6611           .mr(3)
6612           .nr(8)
6613           .kr(4)
6614           .sr(2)
6615           .m(m)
6616           .n(n)
6617           .k(8)
6618           .iterations(1)
6619           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6620       }
6621     }
6622   }
6623 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,k_eq_8_subtile_m)6624   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_eq_8_subtile_m) {
6625     TEST_REQUIRES_ARM_NEON;
6626     for (uint32_t m = 1; m <= 3; m++) {
6627       GemmMicrokernelTester()
6628         .mr(3)
6629         .nr(8)
6630         .kr(4)
6631         .sr(2)
6632         .m(m)
6633         .n(8)
6634         .k(8)
6635         .iterations(1)
6636         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6637     }
6638   }
6639 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,k_eq_8_subtile_n)6640   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_eq_8_subtile_n) {
6641     TEST_REQUIRES_ARM_NEON;
6642     for (uint32_t n = 1; n <= 8; n++) {
6643       GemmMicrokernelTester()
6644         .mr(3)
6645         .nr(8)
6646         .kr(4)
6647         .sr(2)
6648         .m(3)
6649         .n(n)
6650         .k(8)
6651         .iterations(1)
6652         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6653     }
6654   }
6655 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,k_lt_8)6656   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_lt_8) {
6657     TEST_REQUIRES_ARM_NEON;
6658     for (size_t k = 1; k < 8; k++) {
6659       GemmMicrokernelTester()
6660         .mr(3)
6661         .nr(8)
6662         .kr(4)
6663         .sr(2)
6664         .m(3)
6665         .n(8)
6666         .k(k)
6667         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6668     }
6669   }
6670 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,k_lt_8_subtile)6671   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_lt_8_subtile) {
6672     TEST_REQUIRES_ARM_NEON;
6673     for (size_t k = 1; k < 8; k++) {
6674       for (uint32_t n = 1; n <= 8; n++) {
6675         for (uint32_t m = 1; m <= 3; m++) {
6676           GemmMicrokernelTester()
6677             .mr(3)
6678             .nr(8)
6679             .kr(4)
6680             .sr(2)
6681             .m(m)
6682             .n(n)
6683             .k(k)
6684             .iterations(1)
6685             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6686         }
6687       }
6688     }
6689   }
6690 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,k_gt_8)6691   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_gt_8) {
6692     TEST_REQUIRES_ARM_NEON;
6693     for (size_t k = 9; k < 16; k++) {
6694       GemmMicrokernelTester()
6695         .mr(3)
6696         .nr(8)
6697         .kr(4)
6698         .sr(2)
6699         .m(3)
6700         .n(8)
6701         .k(k)
6702         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6703     }
6704   }
6705 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,k_gt_8_subtile)6706   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_gt_8_subtile) {
6707     TEST_REQUIRES_ARM_NEON;
6708     for (size_t k = 9; k < 16; k++) {
6709       for (uint32_t n = 1; n <= 8; n++) {
6710         for (uint32_t m = 1; m <= 3; m++) {
6711           GemmMicrokernelTester()
6712             .mr(3)
6713             .nr(8)
6714             .kr(4)
6715             .sr(2)
6716             .m(m)
6717             .n(n)
6718             .k(k)
6719             .iterations(1)
6720             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6721         }
6722       }
6723     }
6724   }
6725 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,k_div_8)6726   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_div_8) {
6727     TEST_REQUIRES_ARM_NEON;
6728     for (size_t k = 16; k <= 80; k += 8) {
6729       GemmMicrokernelTester()
6730         .mr(3)
6731         .nr(8)
6732         .kr(4)
6733         .sr(2)
6734         .m(3)
6735         .n(8)
6736         .k(k)
6737         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6738     }
6739   }
6740 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,k_div_8_subtile)6741   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_div_8_subtile) {
6742     TEST_REQUIRES_ARM_NEON;
6743     for (size_t k = 16; k <= 80; k += 8) {
6744       for (uint32_t n = 1; n <= 8; n++) {
6745         for (uint32_t m = 1; m <= 3; m++) {
6746           GemmMicrokernelTester()
6747             .mr(3)
6748             .nr(8)
6749             .kr(4)
6750             .sr(2)
6751             .m(m)
6752             .n(n)
6753             .k(k)
6754             .iterations(1)
6755             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6756         }
6757       }
6758     }
6759   }
6760 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,n_gt_8)6761   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, n_gt_8) {
6762     TEST_REQUIRES_ARM_NEON;
6763     for (uint32_t n = 9; n < 16; n++) {
6764       for (size_t k = 1; k <= 40; k += 9) {
6765         GemmMicrokernelTester()
6766           .mr(3)
6767           .nr(8)
6768           .kr(4)
6769           .sr(2)
6770           .m(3)
6771           .n(n)
6772           .k(k)
6773           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6774       }
6775     }
6776   }
6777 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,n_gt_8_strided_cn)6778   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, n_gt_8_strided_cn) {
6779     TEST_REQUIRES_ARM_NEON;
6780     for (uint32_t n = 9; n < 16; n++) {
6781       for (size_t k = 1; k <= 40; k += 9) {
6782         GemmMicrokernelTester()
6783           .mr(3)
6784           .nr(8)
6785           .kr(4)
6786           .sr(2)
6787           .m(3)
6788           .n(n)
6789           .k(k)
6790           .cn_stride(11)
6791           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6792       }
6793     }
6794   }
6795 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,n_gt_8_subtile)6796   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, n_gt_8_subtile) {
6797     TEST_REQUIRES_ARM_NEON;
6798     for (uint32_t n = 9; n < 16; n++) {
6799       for (size_t k = 1; k <= 40; k += 9) {
6800         for (uint32_t m = 1; m <= 3; m++) {
6801           GemmMicrokernelTester()
6802             .mr(3)
6803             .nr(8)
6804             .kr(4)
6805             .sr(2)
6806             .m(m)
6807             .n(n)
6808             .k(k)
6809             .iterations(1)
6810             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6811         }
6812       }
6813     }
6814   }
6815 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,n_div_8)6816   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, n_div_8) {
6817     TEST_REQUIRES_ARM_NEON;
6818     for (uint32_t n = 16; n <= 24; n += 8) {
6819       for (size_t k = 1; k <= 40; k += 9) {
6820         GemmMicrokernelTester()
6821           .mr(3)
6822           .nr(8)
6823           .kr(4)
6824           .sr(2)
6825           .m(3)
6826           .n(n)
6827           .k(k)
6828           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6829       }
6830     }
6831   }
6832 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,n_div_8_strided_cn)6833   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, n_div_8_strided_cn) {
6834     TEST_REQUIRES_ARM_NEON;
6835     for (uint32_t n = 16; n <= 24; n += 8) {
6836       for (size_t k = 1; k <= 40; k += 9) {
6837         GemmMicrokernelTester()
6838           .mr(3)
6839           .nr(8)
6840           .kr(4)
6841           .sr(2)
6842           .m(3)
6843           .n(n)
6844           .k(k)
6845           .cn_stride(11)
6846           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6847       }
6848     }
6849   }
6850 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,n_div_8_subtile)6851   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, n_div_8_subtile) {
6852     TEST_REQUIRES_ARM_NEON;
6853     for (uint32_t n = 16; n <= 24; n += 8) {
6854       for (size_t k = 1; k <= 40; k += 9) {
6855         for (uint32_t m = 1; m <= 3; m++) {
6856           GemmMicrokernelTester()
6857             .mr(3)
6858             .nr(8)
6859             .kr(4)
6860             .sr(2)
6861             .m(m)
6862             .n(n)
6863             .k(k)
6864             .iterations(1)
6865             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6866         }
6867       }
6868     }
6869   }
6870 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,small_kernel)6871   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, small_kernel) {
6872     TEST_REQUIRES_ARM_NEON;
6873     for (size_t k = 1; k <= 40; k += 9) {
6874       GemmMicrokernelTester()
6875         .mr(3)
6876         .nr(8)
6877         .kr(4)
6878         .sr(2)
6879         .m(3)
6880         .n(8)
6881         .k(k)
6882         .ks(3)
6883         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6884     }
6885   }
6886 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,small_kernel_subtile)6887   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, small_kernel_subtile) {
6888     TEST_REQUIRES_ARM_NEON;
6889     for (size_t k = 1; k <= 40; k += 9) {
6890       for (uint32_t n = 1; n <= 8; n++) {
6891         for (uint32_t m = 1; m <= 3; m++) {
6892           GemmMicrokernelTester()
6893             .mr(3)
6894             .nr(8)
6895             .kr(4)
6896             .sr(2)
6897             .m(m)
6898             .n(n)
6899             .k(k)
6900             .ks(3)
6901             .iterations(1)
6902             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6903         }
6904       }
6905     }
6906   }
6907 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,n_gt_8_small_kernel)6908   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, n_gt_8_small_kernel) {
6909     TEST_REQUIRES_ARM_NEON;
6910     for (uint32_t n = 9; n < 16; n++) {
6911       for (size_t k = 1; k <= 40; k += 9) {
6912         GemmMicrokernelTester()
6913           .mr(3)
6914           .nr(8)
6915           .kr(4)
6916           .sr(2)
6917           .m(3)
6918           .n(n)
6919           .k(k)
6920           .ks(3)
6921           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6922       }
6923     }
6924   }
6925 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,n_div_8_small_kernel)6926   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, n_div_8_small_kernel) {
6927     TEST_REQUIRES_ARM_NEON;
6928     for (uint32_t n = 16; n <= 24; n += 8) {
6929       for (size_t k = 1; k <= 40; k += 9) {
6930         GemmMicrokernelTester()
6931           .mr(3)
6932           .nr(8)
6933           .kr(4)
6934           .sr(2)
6935           .m(3)
6936           .n(n)
6937           .k(k)
6938           .ks(3)
6939           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6940       }
6941     }
6942   }
6943 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,strided_cm_subtile)6944   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, strided_cm_subtile) {
6945     TEST_REQUIRES_ARM_NEON;
6946     for (size_t k = 1; k <= 40; k += 9) {
6947       for (uint32_t n = 1; n <= 8; n++) {
6948         for (uint32_t m = 1; m <= 3; m++) {
6949           GemmMicrokernelTester()
6950             .mr(3)
6951             .nr(8)
6952             .kr(4)
6953             .sr(2)
6954             .m(m)
6955             .n(n)
6956             .k(k)
6957             .cm_stride(11)
6958             .iterations(1)
6959             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6960         }
6961       }
6962     }
6963   }
6964 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,a_offset)6965   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, a_offset) {
6966     TEST_REQUIRES_ARM_NEON;
6967     for (size_t k = 1; k <= 40; k += 9) {
6968       GemmMicrokernelTester()
6969         .mr(3)
6970         .nr(8)
6971         .kr(4)
6972         .sr(2)
6973         .m(3)
6974         .n(8)
6975         .k(k)
6976         .ks(3)
6977         .a_offset(127)
6978         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6979     }
6980   }
6981 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,zero)6982   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, zero) {
6983     TEST_REQUIRES_ARM_NEON;
6984     for (size_t k = 1; k <= 40; k += 9) {
6985       for (uint32_t mz = 0; mz < 3; mz++) {
6986         GemmMicrokernelTester()
6987           .mr(3)
6988           .nr(8)
6989           .kr(4)
6990           .sr(2)
6991           .m(3)
6992           .n(8)
6993           .k(k)
6994           .ks(3)
6995           .a_offset(127)
6996           .zero_index(mz)
6997           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6998       }
6999     }
7000   }
7001 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,qmin)7002   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, qmin) {
7003     TEST_REQUIRES_ARM_NEON;
7004     GemmMicrokernelTester()
7005       .mr(3)
7006       .nr(8)
7007       .kr(4)
7008       .sr(2)
7009       .m(3)
7010       .n(8)
7011       .k(8)
7012       .qmin(128)
7013       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7014   }
7015 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,qmax)7016   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, qmax) {
7017     TEST_REQUIRES_ARM_NEON;
7018     GemmMicrokernelTester()
7019       .mr(3)
7020       .nr(8)
7021       .kr(4)
7022       .sr(2)
7023       .m(3)
7024       .n(8)
7025       .k(8)
7026       .qmax(128)
7027       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7028   }
7029 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL,strided_cm)7030   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, strided_cm) {
7031     TEST_REQUIRES_ARM_NEON;
7032     GemmMicrokernelTester()
7033       .mr(3)
7034       .nr(8)
7035       .kr(4)
7036       .sr(2)
7037       .m(3)
7038       .n(8)
7039       .k(8)
7040       .cm_stride(11)
7041       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7042   }
7043 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
7044 
7045 
7046 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,k_eq_8)7047   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_eq_8) {
7048     TEST_REQUIRES_ARM_NEON;
7049     GemmMicrokernelTester()
7050       .mr(4)
7051       .nr(8)
7052       .kr(4)
7053       .sr(2)
7054       .m(4)
7055       .n(8)
7056       .k(8)
7057       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7058   }
7059 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,strided_cn)7060   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, strided_cn) {
7061     TEST_REQUIRES_ARM_NEON;
7062     GemmMicrokernelTester()
7063       .mr(4)
7064       .nr(8)
7065       .kr(4)
7066       .sr(2)
7067       .m(4)
7068       .n(8)
7069       .k(8)
7070       .cn_stride(11)
7071       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7072   }
7073 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,k_eq_8_subtile)7074   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_eq_8_subtile) {
7075     TEST_REQUIRES_ARM_NEON;
7076     for (uint32_t n = 1; n <= 8; n++) {
7077       for (uint32_t m = 1; m <= 4; m++) {
7078         GemmMicrokernelTester()
7079           .mr(4)
7080           .nr(8)
7081           .kr(4)
7082           .sr(2)
7083           .m(m)
7084           .n(n)
7085           .k(8)
7086           .iterations(1)
7087           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7088       }
7089     }
7090   }
7091 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,k_eq_8_subtile_m)7092   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_eq_8_subtile_m) {
7093     TEST_REQUIRES_ARM_NEON;
7094     for (uint32_t m = 1; m <= 4; m++) {
7095       GemmMicrokernelTester()
7096         .mr(4)
7097         .nr(8)
7098         .kr(4)
7099         .sr(2)
7100         .m(m)
7101         .n(8)
7102         .k(8)
7103         .iterations(1)
7104         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7105     }
7106   }
7107 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,k_eq_8_subtile_n)7108   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_eq_8_subtile_n) {
7109     TEST_REQUIRES_ARM_NEON;
7110     for (uint32_t n = 1; n <= 8; n++) {
7111       GemmMicrokernelTester()
7112         .mr(4)
7113         .nr(8)
7114         .kr(4)
7115         .sr(2)
7116         .m(4)
7117         .n(n)
7118         .k(8)
7119         .iterations(1)
7120         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7121     }
7122   }
7123 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,k_lt_8)7124   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_lt_8) {
7125     TEST_REQUIRES_ARM_NEON;
7126     for (size_t k = 1; k < 8; k++) {
7127       GemmMicrokernelTester()
7128         .mr(4)
7129         .nr(8)
7130         .kr(4)
7131         .sr(2)
7132         .m(4)
7133         .n(8)
7134         .k(k)
7135         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7136     }
7137   }
7138 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,k_lt_8_subtile)7139   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_lt_8_subtile) {
7140     TEST_REQUIRES_ARM_NEON;
7141     for (size_t k = 1; k < 8; k++) {
7142       for (uint32_t n = 1; n <= 8; n++) {
7143         for (uint32_t m = 1; m <= 4; m++) {
7144           GemmMicrokernelTester()
7145             .mr(4)
7146             .nr(8)
7147             .kr(4)
7148             .sr(2)
7149             .m(m)
7150             .n(n)
7151             .k(k)
7152             .iterations(1)
7153             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7154         }
7155       }
7156     }
7157   }
7158 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,k_gt_8)7159   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_gt_8) {
7160     TEST_REQUIRES_ARM_NEON;
7161     for (size_t k = 9; k < 16; k++) {
7162       GemmMicrokernelTester()
7163         .mr(4)
7164         .nr(8)
7165         .kr(4)
7166         .sr(2)
7167         .m(4)
7168         .n(8)
7169         .k(k)
7170         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7171     }
7172   }
7173 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,k_gt_8_subtile)7174   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_gt_8_subtile) {
7175     TEST_REQUIRES_ARM_NEON;
7176     for (size_t k = 9; k < 16; k++) {
7177       for (uint32_t n = 1; n <= 8; n++) {
7178         for (uint32_t m = 1; m <= 4; m++) {
7179           GemmMicrokernelTester()
7180             .mr(4)
7181             .nr(8)
7182             .kr(4)
7183             .sr(2)
7184             .m(m)
7185             .n(n)
7186             .k(k)
7187             .iterations(1)
7188             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7189         }
7190       }
7191     }
7192   }
7193 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,k_div_8)7194   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_div_8) {
7195     TEST_REQUIRES_ARM_NEON;
7196     for (size_t k = 16; k <= 80; k += 8) {
7197       GemmMicrokernelTester()
7198         .mr(4)
7199         .nr(8)
7200         .kr(4)
7201         .sr(2)
7202         .m(4)
7203         .n(8)
7204         .k(k)
7205         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7206     }
7207   }
7208 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,k_div_8_subtile)7209   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_div_8_subtile) {
7210     TEST_REQUIRES_ARM_NEON;
7211     for (size_t k = 16; k <= 80; k += 8) {
7212       for (uint32_t n = 1; n <= 8; n++) {
7213         for (uint32_t m = 1; m <= 4; m++) {
7214           GemmMicrokernelTester()
7215             .mr(4)
7216             .nr(8)
7217             .kr(4)
7218             .sr(2)
7219             .m(m)
7220             .n(n)
7221             .k(k)
7222             .iterations(1)
7223             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7224         }
7225       }
7226     }
7227   }
7228 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,n_gt_8)7229   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_gt_8) {
7230     TEST_REQUIRES_ARM_NEON;
7231     for (uint32_t n = 9; n < 16; n++) {
7232       for (size_t k = 1; k <= 40; k += 9) {
7233         GemmMicrokernelTester()
7234           .mr(4)
7235           .nr(8)
7236           .kr(4)
7237           .sr(2)
7238           .m(4)
7239           .n(n)
7240           .k(k)
7241           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7242       }
7243     }
7244   }
7245 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,n_gt_8_strided_cn)7246   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_gt_8_strided_cn) {
7247     TEST_REQUIRES_ARM_NEON;
7248     for (uint32_t n = 9; n < 16; n++) {
7249       for (size_t k = 1; k <= 40; k += 9) {
7250         GemmMicrokernelTester()
7251           .mr(4)
7252           .nr(8)
7253           .kr(4)
7254           .sr(2)
7255           .m(4)
7256           .n(n)
7257           .k(k)
7258           .cn_stride(11)
7259           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7260       }
7261     }
7262   }
7263 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,n_gt_8_subtile)7264   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_gt_8_subtile) {
7265     TEST_REQUIRES_ARM_NEON;
7266     for (uint32_t n = 9; n < 16; n++) {
7267       for (size_t k = 1; k <= 40; k += 9) {
7268         for (uint32_t m = 1; m <= 4; m++) {
7269           GemmMicrokernelTester()
7270             .mr(4)
7271             .nr(8)
7272             .kr(4)
7273             .sr(2)
7274             .m(m)
7275             .n(n)
7276             .k(k)
7277             .iterations(1)
7278             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7279         }
7280       }
7281     }
7282   }
7283 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,n_div_8)7284   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_div_8) {
7285     TEST_REQUIRES_ARM_NEON;
7286     for (uint32_t n = 16; n <= 24; n += 8) {
7287       for (size_t k = 1; k <= 40; k += 9) {
7288         GemmMicrokernelTester()
7289           .mr(4)
7290           .nr(8)
7291           .kr(4)
7292           .sr(2)
7293           .m(4)
7294           .n(n)
7295           .k(k)
7296           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7297       }
7298     }
7299   }
7300 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,n_div_8_strided_cn)7301   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_div_8_strided_cn) {
7302     TEST_REQUIRES_ARM_NEON;
7303     for (uint32_t n = 16; n <= 24; n += 8) {
7304       for (size_t k = 1; k <= 40; k += 9) {
7305         GemmMicrokernelTester()
7306           .mr(4)
7307           .nr(8)
7308           .kr(4)
7309           .sr(2)
7310           .m(4)
7311           .n(n)
7312           .k(k)
7313           .cn_stride(11)
7314           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7315       }
7316     }
7317   }
7318 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,n_div_8_subtile)7319   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_div_8_subtile) {
7320     TEST_REQUIRES_ARM_NEON;
7321     for (uint32_t n = 16; n <= 24; n += 8) {
7322       for (size_t k = 1; k <= 40; k += 9) {
7323         for (uint32_t m = 1; m <= 4; m++) {
7324           GemmMicrokernelTester()
7325             .mr(4)
7326             .nr(8)
7327             .kr(4)
7328             .sr(2)
7329             .m(m)
7330             .n(n)
7331             .k(k)
7332             .iterations(1)
7333             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7334         }
7335       }
7336     }
7337   }
7338 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,small_kernel)7339   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, small_kernel) {
7340     TEST_REQUIRES_ARM_NEON;
7341     for (size_t k = 1; k <= 40; k += 9) {
7342       GemmMicrokernelTester()
7343         .mr(4)
7344         .nr(8)
7345         .kr(4)
7346         .sr(2)
7347         .m(4)
7348         .n(8)
7349         .k(k)
7350         .ks(3)
7351         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7352     }
7353   }
7354 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,small_kernel_subtile)7355   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, small_kernel_subtile) {
7356     TEST_REQUIRES_ARM_NEON;
7357     for (size_t k = 1; k <= 40; k += 9) {
7358       for (uint32_t n = 1; n <= 8; n++) {
7359         for (uint32_t m = 1; m <= 4; m++) {
7360           GemmMicrokernelTester()
7361             .mr(4)
7362             .nr(8)
7363             .kr(4)
7364             .sr(2)
7365             .m(m)
7366             .n(n)
7367             .k(k)
7368             .ks(3)
7369             .iterations(1)
7370             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7371         }
7372       }
7373     }
7374   }
7375 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,n_gt_8_small_kernel)7376   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_gt_8_small_kernel) {
7377     TEST_REQUIRES_ARM_NEON;
7378     for (uint32_t n = 9; n < 16; n++) {
7379       for (size_t k = 1; k <= 40; k += 9) {
7380         GemmMicrokernelTester()
7381           .mr(4)
7382           .nr(8)
7383           .kr(4)
7384           .sr(2)
7385           .m(4)
7386           .n(n)
7387           .k(k)
7388           .ks(3)
7389           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7390       }
7391     }
7392   }
7393 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,n_div_8_small_kernel)7394   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_div_8_small_kernel) {
7395     TEST_REQUIRES_ARM_NEON;
7396     for (uint32_t n = 16; n <= 24; n += 8) {
7397       for (size_t k = 1; k <= 40; k += 9) {
7398         GemmMicrokernelTester()
7399           .mr(4)
7400           .nr(8)
7401           .kr(4)
7402           .sr(2)
7403           .m(4)
7404           .n(n)
7405           .k(k)
7406           .ks(3)
7407           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7408       }
7409     }
7410   }
7411 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,strided_cm_subtile)7412   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, strided_cm_subtile) {
7413     TEST_REQUIRES_ARM_NEON;
7414     for (size_t k = 1; k <= 40; k += 9) {
7415       for (uint32_t n = 1; n <= 8; n++) {
7416         for (uint32_t m = 1; m <= 4; m++) {
7417           GemmMicrokernelTester()
7418             .mr(4)
7419             .nr(8)
7420             .kr(4)
7421             .sr(2)
7422             .m(m)
7423             .n(n)
7424             .k(k)
7425             .cm_stride(11)
7426             .iterations(1)
7427             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7428         }
7429       }
7430     }
7431   }
7432 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,a_offset)7433   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, a_offset) {
7434     TEST_REQUIRES_ARM_NEON;
7435     for (size_t k = 1; k <= 40; k += 9) {
7436       GemmMicrokernelTester()
7437         .mr(4)
7438         .nr(8)
7439         .kr(4)
7440         .sr(2)
7441         .m(4)
7442         .n(8)
7443         .k(k)
7444         .ks(3)
7445         .a_offset(163)
7446         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7447     }
7448   }
7449 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,zero)7450   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, zero) {
7451     TEST_REQUIRES_ARM_NEON;
7452     for (size_t k = 1; k <= 40; k += 9) {
7453       for (uint32_t mz = 0; mz < 4; mz++) {
7454         GemmMicrokernelTester()
7455           .mr(4)
7456           .nr(8)
7457           .kr(4)
7458           .sr(2)
7459           .m(4)
7460           .n(8)
7461           .k(k)
7462           .ks(3)
7463           .a_offset(163)
7464           .zero_index(mz)
7465           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7466       }
7467     }
7468   }
7469 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,qmin)7470   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, qmin) {
7471     TEST_REQUIRES_ARM_NEON;
7472     GemmMicrokernelTester()
7473       .mr(4)
7474       .nr(8)
7475       .kr(4)
7476       .sr(2)
7477       .m(4)
7478       .n(8)
7479       .k(8)
7480       .qmin(128)
7481       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7482   }
7483 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,qmax)7484   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, qmax) {
7485     TEST_REQUIRES_ARM_NEON;
7486     GemmMicrokernelTester()
7487       .mr(4)
7488       .nr(8)
7489       .kr(4)
7490       .sr(2)
7491       .m(4)
7492       .n(8)
7493       .k(8)
7494       .qmax(128)
7495       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7496   }
7497 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL,strided_cm)7498   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, strided_cm) {
7499     TEST_REQUIRES_ARM_NEON;
7500     GemmMicrokernelTester()
7501       .mr(4)
7502       .nr(8)
7503       .kr(4)
7504       .sr(2)
7505       .m(4)
7506       .n(8)
7507       .k(8)
7508       .cm_stride(11)
7509       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7510   }
7511 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
7512 
7513 
7514 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,k_eq_8)7515   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_eq_8) {
7516     TEST_REQUIRES_ARM_NEON;
7517     GemmMicrokernelTester()
7518       .mr(2)
7519       .nr(16)
7520       .kr(4)
7521       .sr(2)
7522       .m(2)
7523       .n(16)
7524       .k(8)
7525       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7526   }
7527 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,strided_cn)7528   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, strided_cn) {
7529     TEST_REQUIRES_ARM_NEON;
7530     GemmMicrokernelTester()
7531       .mr(2)
7532       .nr(16)
7533       .kr(4)
7534       .sr(2)
7535       .m(2)
7536       .n(16)
7537       .k(8)
7538       .cn_stride(19)
7539       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7540   }
7541 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,k_eq_8_subtile)7542   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_eq_8_subtile) {
7543     TEST_REQUIRES_ARM_NEON;
7544     for (uint32_t n = 1; n <= 16; n++) {
7545       for (uint32_t m = 1; m <= 2; m++) {
7546         GemmMicrokernelTester()
7547           .mr(2)
7548           .nr(16)
7549           .kr(4)
7550           .sr(2)
7551           .m(m)
7552           .n(n)
7553           .k(8)
7554           .iterations(1)
7555           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7556       }
7557     }
7558   }
7559 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,k_eq_8_subtile_m)7560   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_eq_8_subtile_m) {
7561     TEST_REQUIRES_ARM_NEON;
7562     for (uint32_t m = 1; m <= 2; m++) {
7563       GemmMicrokernelTester()
7564         .mr(2)
7565         .nr(16)
7566         .kr(4)
7567         .sr(2)
7568         .m(m)
7569         .n(16)
7570         .k(8)
7571         .iterations(1)
7572         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7573     }
7574   }
7575 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,k_eq_8_subtile_n)7576   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_eq_8_subtile_n) {
7577     TEST_REQUIRES_ARM_NEON;
7578     for (uint32_t n = 1; n <= 16; n++) {
7579       GemmMicrokernelTester()
7580         .mr(2)
7581         .nr(16)
7582         .kr(4)
7583         .sr(2)
7584         .m(2)
7585         .n(n)
7586         .k(8)
7587         .iterations(1)
7588         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7589     }
7590   }
7591 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,k_lt_8)7592   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_lt_8) {
7593     TEST_REQUIRES_ARM_NEON;
7594     for (size_t k = 1; k < 8; k++) {
7595       GemmMicrokernelTester()
7596         .mr(2)
7597         .nr(16)
7598         .kr(4)
7599         .sr(2)
7600         .m(2)
7601         .n(16)
7602         .k(k)
7603         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7604     }
7605   }
7606 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,k_lt_8_subtile)7607   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_lt_8_subtile) {
7608     TEST_REQUIRES_ARM_NEON;
7609     for (size_t k = 1; k < 8; k++) {
7610       for (uint32_t n = 1; n <= 16; n++) {
7611         for (uint32_t m = 1; m <= 2; m++) {
7612           GemmMicrokernelTester()
7613             .mr(2)
7614             .nr(16)
7615             .kr(4)
7616             .sr(2)
7617             .m(m)
7618             .n(n)
7619             .k(k)
7620             .iterations(1)
7621             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7622         }
7623       }
7624     }
7625   }
7626 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,k_gt_8)7627   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_gt_8) {
7628     TEST_REQUIRES_ARM_NEON;
7629     for (size_t k = 9; k < 16; k++) {
7630       GemmMicrokernelTester()
7631         .mr(2)
7632         .nr(16)
7633         .kr(4)
7634         .sr(2)
7635         .m(2)
7636         .n(16)
7637         .k(k)
7638         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7639     }
7640   }
7641 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,k_gt_8_subtile)7642   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_gt_8_subtile) {
7643     TEST_REQUIRES_ARM_NEON;
7644     for (size_t k = 9; k < 16; k++) {
7645       for (uint32_t n = 1; n <= 16; n++) {
7646         for (uint32_t m = 1; m <= 2; m++) {
7647           GemmMicrokernelTester()
7648             .mr(2)
7649             .nr(16)
7650             .kr(4)
7651             .sr(2)
7652             .m(m)
7653             .n(n)
7654             .k(k)
7655             .iterations(1)
7656             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7657         }
7658       }
7659     }
7660   }
7661 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,k_div_8)7662   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_div_8) {
7663     TEST_REQUIRES_ARM_NEON;
7664     for (size_t k = 16; k <= 80; k += 8) {
7665       GemmMicrokernelTester()
7666         .mr(2)
7667         .nr(16)
7668         .kr(4)
7669         .sr(2)
7670         .m(2)
7671         .n(16)
7672         .k(k)
7673         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7674     }
7675   }
7676 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,k_div_8_subtile)7677   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_div_8_subtile) {
7678     TEST_REQUIRES_ARM_NEON;
7679     for (size_t k = 16; k <= 80; k += 8) {
7680       for (uint32_t n = 1; n <= 16; n++) {
7681         for (uint32_t m = 1; m <= 2; m++) {
7682           GemmMicrokernelTester()
7683             .mr(2)
7684             .nr(16)
7685             .kr(4)
7686             .sr(2)
7687             .m(m)
7688             .n(n)
7689             .k(k)
7690             .iterations(1)
7691             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7692         }
7693       }
7694     }
7695   }
7696 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,n_gt_16)7697   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, n_gt_16) {
7698     TEST_REQUIRES_ARM_NEON;
7699     for (uint32_t n = 17; n < 32; n++) {
7700       for (size_t k = 1; k <= 40; k += 9) {
7701         GemmMicrokernelTester()
7702           .mr(2)
7703           .nr(16)
7704           .kr(4)
7705           .sr(2)
7706           .m(2)
7707           .n(n)
7708           .k(k)
7709           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7710       }
7711     }
7712   }
7713 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,n_gt_16_strided_cn)7714   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, n_gt_16_strided_cn) {
7715     TEST_REQUIRES_ARM_NEON;
7716     for (uint32_t n = 17; n < 32; n++) {
7717       for (size_t k = 1; k <= 40; k += 9) {
7718         GemmMicrokernelTester()
7719           .mr(2)
7720           .nr(16)
7721           .kr(4)
7722           .sr(2)
7723           .m(2)
7724           .n(n)
7725           .k(k)
7726           .cn_stride(19)
7727           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7728       }
7729     }
7730   }
7731 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,n_gt_16_subtile)7732   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, n_gt_16_subtile) {
7733     TEST_REQUIRES_ARM_NEON;
7734     for (uint32_t n = 17; n < 32; n++) {
7735       for (size_t k = 1; k <= 40; k += 9) {
7736         for (uint32_t m = 1; m <= 2; m++) {
7737           GemmMicrokernelTester()
7738             .mr(2)
7739             .nr(16)
7740             .kr(4)
7741             .sr(2)
7742             .m(m)
7743             .n(n)
7744             .k(k)
7745             .iterations(1)
7746             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7747         }
7748       }
7749     }
7750   }
7751 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,n_div_16)7752   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, n_div_16) {
7753     TEST_REQUIRES_ARM_NEON;
7754     for (uint32_t n = 32; n <= 48; n += 16) {
7755       for (size_t k = 1; k <= 40; k += 9) {
7756         GemmMicrokernelTester()
7757           .mr(2)
7758           .nr(16)
7759           .kr(4)
7760           .sr(2)
7761           .m(2)
7762           .n(n)
7763           .k(k)
7764           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7765       }
7766     }
7767   }
7768 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,n_div_16_strided_cn)7769   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, n_div_16_strided_cn) {
7770     TEST_REQUIRES_ARM_NEON;
7771     for (uint32_t n = 32; n <= 48; n += 16) {
7772       for (size_t k = 1; k <= 40; k += 9) {
7773         GemmMicrokernelTester()
7774           .mr(2)
7775           .nr(16)
7776           .kr(4)
7777           .sr(2)
7778           .m(2)
7779           .n(n)
7780           .k(k)
7781           .cn_stride(19)
7782           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7783       }
7784     }
7785   }
7786 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,n_div_16_subtile)7787   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, n_div_16_subtile) {
7788     TEST_REQUIRES_ARM_NEON;
7789     for (uint32_t n = 32; n <= 48; n += 16) {
7790       for (size_t k = 1; k <= 40; k += 9) {
7791         for (uint32_t m = 1; m <= 2; m++) {
7792           GemmMicrokernelTester()
7793             .mr(2)
7794             .nr(16)
7795             .kr(4)
7796             .sr(2)
7797             .m(m)
7798             .n(n)
7799             .k(k)
7800             .iterations(1)
7801             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7802         }
7803       }
7804     }
7805   }
7806 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,small_kernel)7807   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, small_kernel) {
7808     TEST_REQUIRES_ARM_NEON;
7809     for (size_t k = 1; k <= 40; k += 9) {
7810       GemmMicrokernelTester()
7811         .mr(2)
7812         .nr(16)
7813         .kr(4)
7814         .sr(2)
7815         .m(2)
7816         .n(16)
7817         .k(k)
7818         .ks(3)
7819         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7820     }
7821   }
7822 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,small_kernel_subtile)7823   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, small_kernel_subtile) {
7824     TEST_REQUIRES_ARM_NEON;
7825     for (size_t k = 1; k <= 40; k += 9) {
7826       for (uint32_t n = 1; n <= 16; n++) {
7827         for (uint32_t m = 1; m <= 2; m++) {
7828           GemmMicrokernelTester()
7829             .mr(2)
7830             .nr(16)
7831             .kr(4)
7832             .sr(2)
7833             .m(m)
7834             .n(n)
7835             .k(k)
7836             .ks(3)
7837             .iterations(1)
7838             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7839         }
7840       }
7841     }
7842   }
7843 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,n_gt_16_small_kernel)7844   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, n_gt_16_small_kernel) {
7845     TEST_REQUIRES_ARM_NEON;
7846     for (uint32_t n = 17; n < 32; n++) {
7847       for (size_t k = 1; k <= 40; k += 9) {
7848         GemmMicrokernelTester()
7849           .mr(2)
7850           .nr(16)
7851           .kr(4)
7852           .sr(2)
7853           .m(2)
7854           .n(n)
7855           .k(k)
7856           .ks(3)
7857           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7858       }
7859     }
7860   }
7861 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,n_div_16_small_kernel)7862   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, n_div_16_small_kernel) {
7863     TEST_REQUIRES_ARM_NEON;
7864     for (uint32_t n = 32; n <= 48; n += 16) {
7865       for (size_t k = 1; k <= 40; k += 9) {
7866         GemmMicrokernelTester()
7867           .mr(2)
7868           .nr(16)
7869           .kr(4)
7870           .sr(2)
7871           .m(2)
7872           .n(n)
7873           .k(k)
7874           .ks(3)
7875           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7876       }
7877     }
7878   }
7879 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,strided_cm_subtile)7880   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, strided_cm_subtile) {
7881     TEST_REQUIRES_ARM_NEON;
7882     for (size_t k = 1; k <= 40; k += 9) {
7883       for (uint32_t n = 1; n <= 16; n++) {
7884         for (uint32_t m = 1; m <= 2; m++) {
7885           GemmMicrokernelTester()
7886             .mr(2)
7887             .nr(16)
7888             .kr(4)
7889             .sr(2)
7890             .m(m)
7891             .n(n)
7892             .k(k)
7893             .cm_stride(19)
7894             .iterations(1)
7895             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7896         }
7897       }
7898     }
7899   }
7900 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,a_offset)7901   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, a_offset) {
7902     TEST_REQUIRES_ARM_NEON;
7903     for (size_t k = 1; k <= 40; k += 9) {
7904       GemmMicrokernelTester()
7905         .mr(2)
7906         .nr(16)
7907         .kr(4)
7908         .sr(2)
7909         .m(2)
7910         .n(16)
7911         .k(k)
7912         .ks(3)
7913         .a_offset(83)
7914         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7915     }
7916   }
7917 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,zero)7918   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, zero) {
7919     TEST_REQUIRES_ARM_NEON;
7920     for (size_t k = 1; k <= 40; k += 9) {
7921       for (uint32_t mz = 0; mz < 2; mz++) {
7922         GemmMicrokernelTester()
7923           .mr(2)
7924           .nr(16)
7925           .kr(4)
7926           .sr(2)
7927           .m(2)
7928           .n(16)
7929           .k(k)
7930           .ks(3)
7931           .a_offset(83)
7932           .zero_index(mz)
7933           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7934       }
7935     }
7936   }
7937 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,qmin)7938   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, qmin) {
7939     TEST_REQUIRES_ARM_NEON;
7940     GemmMicrokernelTester()
7941       .mr(2)
7942       .nr(16)
7943       .kr(4)
7944       .sr(2)
7945       .m(2)
7946       .n(16)
7947       .k(8)
7948       .qmin(128)
7949       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7950   }
7951 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,qmax)7952   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, qmax) {
7953     TEST_REQUIRES_ARM_NEON;
7954     GemmMicrokernelTester()
7955       .mr(2)
7956       .nr(16)
7957       .kr(4)
7958       .sr(2)
7959       .m(2)
7960       .n(16)
7961       .k(8)
7962       .qmax(128)
7963       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7964   }
7965 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL,strided_cm)7966   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, strided_cm) {
7967     TEST_REQUIRES_ARM_NEON;
7968     GemmMicrokernelTester()
7969       .mr(2)
7970       .nr(16)
7971       .kr(4)
7972       .sr(2)
7973       .m(2)
7974       .n(16)
7975       .k(8)
7976       .cm_stride(19)
7977       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7978   }
7979 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
7980 
7981 
7982 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,k_eq_8)7983   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, k_eq_8) {
7984     TEST_REQUIRES_ARM_NEON;
7985     GemmMicrokernelTester()
7986       .mr(3)
7987       .nr(16)
7988       .kr(4)
7989       .sr(2)
7990       .m(3)
7991       .n(16)
7992       .k(8)
7993       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
7994   }
7995 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,strided_cn)7996   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, strided_cn) {
7997     TEST_REQUIRES_ARM_NEON;
7998     GemmMicrokernelTester()
7999       .mr(3)
8000       .nr(16)
8001       .kr(4)
8002       .sr(2)
8003       .m(3)
8004       .n(16)
8005       .k(8)
8006       .cn_stride(19)
8007       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8008   }
8009 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,k_eq_8_subtile)8010   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, k_eq_8_subtile) {
8011     TEST_REQUIRES_ARM_NEON;
8012     for (uint32_t n = 1; n <= 16; n++) {
8013       for (uint32_t m = 1; m <= 3; m++) {
8014         GemmMicrokernelTester()
8015           .mr(3)
8016           .nr(16)
8017           .kr(4)
8018           .sr(2)
8019           .m(m)
8020           .n(n)
8021           .k(8)
8022           .iterations(1)
8023           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8024       }
8025     }
8026   }
8027 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,k_eq_8_subtile_m)8028   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, k_eq_8_subtile_m) {
8029     TEST_REQUIRES_ARM_NEON;
8030     for (uint32_t m = 1; m <= 3; m++) {
8031       GemmMicrokernelTester()
8032         .mr(3)
8033         .nr(16)
8034         .kr(4)
8035         .sr(2)
8036         .m(m)
8037         .n(16)
8038         .k(8)
8039         .iterations(1)
8040         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8041     }
8042   }
8043 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,k_eq_8_subtile_n)8044   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, k_eq_8_subtile_n) {
8045     TEST_REQUIRES_ARM_NEON;
8046     for (uint32_t n = 1; n <= 16; n++) {
8047       GemmMicrokernelTester()
8048         .mr(3)
8049         .nr(16)
8050         .kr(4)
8051         .sr(2)
8052         .m(3)
8053         .n(n)
8054         .k(8)
8055         .iterations(1)
8056         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8057     }
8058   }
8059 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,k_lt_8)8060   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, k_lt_8) {
8061     TEST_REQUIRES_ARM_NEON;
8062     for (size_t k = 1; k < 8; k++) {
8063       GemmMicrokernelTester()
8064         .mr(3)
8065         .nr(16)
8066         .kr(4)
8067         .sr(2)
8068         .m(3)
8069         .n(16)
8070         .k(k)
8071         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8072     }
8073   }
8074 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,k_lt_8_subtile)8075   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, k_lt_8_subtile) {
8076     TEST_REQUIRES_ARM_NEON;
8077     for (size_t k = 1; k < 8; k++) {
8078       for (uint32_t n = 1; n <= 16; n++) {
8079         for (uint32_t m = 1; m <= 3; m++) {
8080           GemmMicrokernelTester()
8081             .mr(3)
8082             .nr(16)
8083             .kr(4)
8084             .sr(2)
8085             .m(m)
8086             .n(n)
8087             .k(k)
8088             .iterations(1)
8089             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8090         }
8091       }
8092     }
8093   }
8094 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,k_gt_8)8095   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, k_gt_8) {
8096     TEST_REQUIRES_ARM_NEON;
8097     for (size_t k = 9; k < 16; k++) {
8098       GemmMicrokernelTester()
8099         .mr(3)
8100         .nr(16)
8101         .kr(4)
8102         .sr(2)
8103         .m(3)
8104         .n(16)
8105         .k(k)
8106         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8107     }
8108   }
8109 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,k_gt_8_subtile)8110   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, k_gt_8_subtile) {
8111     TEST_REQUIRES_ARM_NEON;
8112     for (size_t k = 9; k < 16; k++) {
8113       for (uint32_t n = 1; n <= 16; n++) {
8114         for (uint32_t m = 1; m <= 3; m++) {
8115           GemmMicrokernelTester()
8116             .mr(3)
8117             .nr(16)
8118             .kr(4)
8119             .sr(2)
8120             .m(m)
8121             .n(n)
8122             .k(k)
8123             .iterations(1)
8124             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8125         }
8126       }
8127     }
8128   }
8129 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,k_div_8)8130   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, k_div_8) {
8131     TEST_REQUIRES_ARM_NEON;
8132     for (size_t k = 16; k <= 80; k += 8) {
8133       GemmMicrokernelTester()
8134         .mr(3)
8135         .nr(16)
8136         .kr(4)
8137         .sr(2)
8138         .m(3)
8139         .n(16)
8140         .k(k)
8141         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8142     }
8143   }
8144 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,k_div_8_subtile)8145   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, k_div_8_subtile) {
8146     TEST_REQUIRES_ARM_NEON;
8147     for (size_t k = 16; k <= 80; k += 8) {
8148       for (uint32_t n = 1; n <= 16; n++) {
8149         for (uint32_t m = 1; m <= 3; m++) {
8150           GemmMicrokernelTester()
8151             .mr(3)
8152             .nr(16)
8153             .kr(4)
8154             .sr(2)
8155             .m(m)
8156             .n(n)
8157             .k(k)
8158             .iterations(1)
8159             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8160         }
8161       }
8162     }
8163   }
8164 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,n_gt_16)8165   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, n_gt_16) {
8166     TEST_REQUIRES_ARM_NEON;
8167     for (uint32_t n = 17; n < 32; n++) {
8168       for (size_t k = 1; k <= 40; k += 9) {
8169         GemmMicrokernelTester()
8170           .mr(3)
8171           .nr(16)
8172           .kr(4)
8173           .sr(2)
8174           .m(3)
8175           .n(n)
8176           .k(k)
8177           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8178       }
8179     }
8180   }
8181 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,n_gt_16_strided_cn)8182   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, n_gt_16_strided_cn) {
8183     TEST_REQUIRES_ARM_NEON;
8184     for (uint32_t n = 17; n < 32; n++) {
8185       for (size_t k = 1; k <= 40; k += 9) {
8186         GemmMicrokernelTester()
8187           .mr(3)
8188           .nr(16)
8189           .kr(4)
8190           .sr(2)
8191           .m(3)
8192           .n(n)
8193           .k(k)
8194           .cn_stride(19)
8195           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8196       }
8197     }
8198   }
8199 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,n_gt_16_subtile)8200   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, n_gt_16_subtile) {
8201     TEST_REQUIRES_ARM_NEON;
8202     for (uint32_t n = 17; n < 32; n++) {
8203       for (size_t k = 1; k <= 40; k += 9) {
8204         for (uint32_t m = 1; m <= 3; m++) {
8205           GemmMicrokernelTester()
8206             .mr(3)
8207             .nr(16)
8208             .kr(4)
8209             .sr(2)
8210             .m(m)
8211             .n(n)
8212             .k(k)
8213             .iterations(1)
8214             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8215         }
8216       }
8217     }
8218   }
8219 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,n_div_16)8220   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, n_div_16) {
8221     TEST_REQUIRES_ARM_NEON;
8222     for (uint32_t n = 32; n <= 48; n += 16) {
8223       for (size_t k = 1; k <= 40; k += 9) {
8224         GemmMicrokernelTester()
8225           .mr(3)
8226           .nr(16)
8227           .kr(4)
8228           .sr(2)
8229           .m(3)
8230           .n(n)
8231           .k(k)
8232           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8233       }
8234     }
8235   }
8236 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,n_div_16_strided_cn)8237   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, n_div_16_strided_cn) {
8238     TEST_REQUIRES_ARM_NEON;
8239     for (uint32_t n = 32; n <= 48; n += 16) {
8240       for (size_t k = 1; k <= 40; k += 9) {
8241         GemmMicrokernelTester()
8242           .mr(3)
8243           .nr(16)
8244           .kr(4)
8245           .sr(2)
8246           .m(3)
8247           .n(n)
8248           .k(k)
8249           .cn_stride(19)
8250           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8251       }
8252     }
8253   }
8254 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,n_div_16_subtile)8255   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, n_div_16_subtile) {
8256     TEST_REQUIRES_ARM_NEON;
8257     for (uint32_t n = 32; n <= 48; n += 16) {
8258       for (size_t k = 1; k <= 40; k += 9) {
8259         for (uint32_t m = 1; m <= 3; m++) {
8260           GemmMicrokernelTester()
8261             .mr(3)
8262             .nr(16)
8263             .kr(4)
8264             .sr(2)
8265             .m(m)
8266             .n(n)
8267             .k(k)
8268             .iterations(1)
8269             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8270         }
8271       }
8272     }
8273   }
8274 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,small_kernel)8275   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, small_kernel) {
8276     TEST_REQUIRES_ARM_NEON;
8277     for (size_t k = 1; k <= 40; k += 9) {
8278       GemmMicrokernelTester()
8279         .mr(3)
8280         .nr(16)
8281         .kr(4)
8282         .sr(2)
8283         .m(3)
8284         .n(16)
8285         .k(k)
8286         .ks(3)
8287         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8288     }
8289   }
8290 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,small_kernel_subtile)8291   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, small_kernel_subtile) {
8292     TEST_REQUIRES_ARM_NEON;
8293     for (size_t k = 1; k <= 40; k += 9) {
8294       for (uint32_t n = 1; n <= 16; n++) {
8295         for (uint32_t m = 1; m <= 3; m++) {
8296           GemmMicrokernelTester()
8297             .mr(3)
8298             .nr(16)
8299             .kr(4)
8300             .sr(2)
8301             .m(m)
8302             .n(n)
8303             .k(k)
8304             .ks(3)
8305             .iterations(1)
8306             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8307         }
8308       }
8309     }
8310   }
8311 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,n_gt_16_small_kernel)8312   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, n_gt_16_small_kernel) {
8313     TEST_REQUIRES_ARM_NEON;
8314     for (uint32_t n = 17; n < 32; n++) {
8315       for (size_t k = 1; k <= 40; k += 9) {
8316         GemmMicrokernelTester()
8317           .mr(3)
8318           .nr(16)
8319           .kr(4)
8320           .sr(2)
8321           .m(3)
8322           .n(n)
8323           .k(k)
8324           .ks(3)
8325           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8326       }
8327     }
8328   }
8329 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,n_div_16_small_kernel)8330   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, n_div_16_small_kernel) {
8331     TEST_REQUIRES_ARM_NEON;
8332     for (uint32_t n = 32; n <= 48; n += 16) {
8333       for (size_t k = 1; k <= 40; k += 9) {
8334         GemmMicrokernelTester()
8335           .mr(3)
8336           .nr(16)
8337           .kr(4)
8338           .sr(2)
8339           .m(3)
8340           .n(n)
8341           .k(k)
8342           .ks(3)
8343           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8344       }
8345     }
8346   }
8347 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,strided_cm_subtile)8348   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, strided_cm_subtile) {
8349     TEST_REQUIRES_ARM_NEON;
8350     for (size_t k = 1; k <= 40; k += 9) {
8351       for (uint32_t n = 1; n <= 16; n++) {
8352         for (uint32_t m = 1; m <= 3; m++) {
8353           GemmMicrokernelTester()
8354             .mr(3)
8355             .nr(16)
8356             .kr(4)
8357             .sr(2)
8358             .m(m)
8359             .n(n)
8360             .k(k)
8361             .cm_stride(19)
8362             .iterations(1)
8363             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8364         }
8365       }
8366     }
8367   }
8368 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,a_offset)8369   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, a_offset) {
8370     TEST_REQUIRES_ARM_NEON;
8371     for (size_t k = 1; k <= 40; k += 9) {
8372       GemmMicrokernelTester()
8373         .mr(3)
8374         .nr(16)
8375         .kr(4)
8376         .sr(2)
8377         .m(3)
8378         .n(16)
8379         .k(k)
8380         .ks(3)
8381         .a_offset(127)
8382         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8383     }
8384   }
8385 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,zero)8386   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, zero) {
8387     TEST_REQUIRES_ARM_NEON;
8388     for (size_t k = 1; k <= 40; k += 9) {
8389       for (uint32_t mz = 0; mz < 3; mz++) {
8390         GemmMicrokernelTester()
8391           .mr(3)
8392           .nr(16)
8393           .kr(4)
8394           .sr(2)
8395           .m(3)
8396           .n(16)
8397           .k(k)
8398           .ks(3)
8399           .a_offset(127)
8400           .zero_index(mz)
8401           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8402       }
8403     }
8404   }
8405 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,qmin)8406   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, qmin) {
8407     TEST_REQUIRES_ARM_NEON;
8408     GemmMicrokernelTester()
8409       .mr(3)
8410       .nr(16)
8411       .kr(4)
8412       .sr(2)
8413       .m(3)
8414       .n(16)
8415       .k(8)
8416       .qmin(128)
8417       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8418   }
8419 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,qmax)8420   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, qmax) {
8421     TEST_REQUIRES_ARM_NEON;
8422     GemmMicrokernelTester()
8423       .mr(3)
8424       .nr(16)
8425       .kr(4)
8426       .sr(2)
8427       .m(3)
8428       .n(16)
8429       .k(8)
8430       .qmax(128)
8431       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8432   }
8433 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL,strided_cm)8434   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, strided_cm) {
8435     TEST_REQUIRES_ARM_NEON;
8436     GemmMicrokernelTester()
8437       .mr(3)
8438       .nr(16)
8439       .kr(4)
8440       .sr(2)
8441       .m(3)
8442       .n(16)
8443       .k(8)
8444       .cm_stride(19)
8445       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8446   }
8447 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
8448 
8449 
8450 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,k_eq_16)8451   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_eq_16) {
8452     TEST_REQUIRES_ARM_NEON;
8453     GemmMicrokernelTester()
8454       .mr(3)
8455       .nr(8)
8456       .kr(2)
8457       .sr(4)
8458       .m(3)
8459       .n(8)
8460       .k(16)
8461       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8462   }
8463 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,strided_cn)8464   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, strided_cn) {
8465     TEST_REQUIRES_ARM_NEON;
8466     GemmMicrokernelTester()
8467       .mr(3)
8468       .nr(8)
8469       .kr(2)
8470       .sr(4)
8471       .m(3)
8472       .n(8)
8473       .k(16)
8474       .cn_stride(11)
8475       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8476   }
8477 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,k_eq_16_subtile)8478   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_eq_16_subtile) {
8479     TEST_REQUIRES_ARM_NEON;
8480     for (uint32_t n = 1; n <= 8; n++) {
8481       for (uint32_t m = 1; m <= 3; m++) {
8482         GemmMicrokernelTester()
8483           .mr(3)
8484           .nr(8)
8485           .kr(2)
8486           .sr(4)
8487           .m(m)
8488           .n(n)
8489           .k(16)
8490           .iterations(1)
8491           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8492       }
8493     }
8494   }
8495 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,k_eq_16_subtile_m)8496   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_eq_16_subtile_m) {
8497     TEST_REQUIRES_ARM_NEON;
8498     for (uint32_t m = 1; m <= 3; m++) {
8499       GemmMicrokernelTester()
8500         .mr(3)
8501         .nr(8)
8502         .kr(2)
8503         .sr(4)
8504         .m(m)
8505         .n(8)
8506         .k(16)
8507         .iterations(1)
8508         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8509     }
8510   }
8511 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,k_eq_16_subtile_n)8512   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_eq_16_subtile_n) {
8513     TEST_REQUIRES_ARM_NEON;
8514     for (uint32_t n = 1; n <= 8; n++) {
8515       GemmMicrokernelTester()
8516         .mr(3)
8517         .nr(8)
8518         .kr(2)
8519         .sr(4)
8520         .m(3)
8521         .n(n)
8522         .k(16)
8523         .iterations(1)
8524         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8525     }
8526   }
8527 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,k_lt_16)8528   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_lt_16) {
8529     TEST_REQUIRES_ARM_NEON;
8530     for (size_t k = 1; k < 16; k++) {
8531       GemmMicrokernelTester()
8532         .mr(3)
8533         .nr(8)
8534         .kr(2)
8535         .sr(4)
8536         .m(3)
8537         .n(8)
8538         .k(k)
8539         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8540     }
8541   }
8542 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,k_lt_16_subtile)8543   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_lt_16_subtile) {
8544     TEST_REQUIRES_ARM_NEON;
8545     for (size_t k = 1; k < 16; k++) {
8546       for (uint32_t n = 1; n <= 8; n++) {
8547         for (uint32_t m = 1; m <= 3; m++) {
8548           GemmMicrokernelTester()
8549             .mr(3)
8550             .nr(8)
8551             .kr(2)
8552             .sr(4)
8553             .m(m)
8554             .n(n)
8555             .k(k)
8556             .iterations(1)
8557             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8558         }
8559       }
8560     }
8561   }
8562 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,k_gt_16)8563   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_gt_16) {
8564     TEST_REQUIRES_ARM_NEON;
8565     for (size_t k = 17; k < 32; k++) {
8566       GemmMicrokernelTester()
8567         .mr(3)
8568         .nr(8)
8569         .kr(2)
8570         .sr(4)
8571         .m(3)
8572         .n(8)
8573         .k(k)
8574         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8575     }
8576   }
8577 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,k_gt_16_subtile)8578   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_gt_16_subtile) {
8579     TEST_REQUIRES_ARM_NEON;
8580     for (size_t k = 17; k < 32; k++) {
8581       for (uint32_t n = 1; n <= 8; n++) {
8582         for (uint32_t m = 1; m <= 3; m++) {
8583           GemmMicrokernelTester()
8584             .mr(3)
8585             .nr(8)
8586             .kr(2)
8587             .sr(4)
8588             .m(m)
8589             .n(n)
8590             .k(k)
8591             .iterations(1)
8592             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8593         }
8594       }
8595     }
8596   }
8597 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,k_div_16)8598   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_div_16) {
8599     TEST_REQUIRES_ARM_NEON;
8600     for (size_t k = 32; k <= 160; k += 16) {
8601       GemmMicrokernelTester()
8602         .mr(3)
8603         .nr(8)
8604         .kr(2)
8605         .sr(4)
8606         .m(3)
8607         .n(8)
8608         .k(k)
8609         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8610     }
8611   }
8612 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,k_div_16_subtile)8613   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_div_16_subtile) {
8614     TEST_REQUIRES_ARM_NEON;
8615     for (size_t k = 32; k <= 160; k += 16) {
8616       for (uint32_t n = 1; n <= 8; n++) {
8617         for (uint32_t m = 1; m <= 3; m++) {
8618           GemmMicrokernelTester()
8619             .mr(3)
8620             .nr(8)
8621             .kr(2)
8622             .sr(4)
8623             .m(m)
8624             .n(n)
8625             .k(k)
8626             .iterations(1)
8627             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8628         }
8629       }
8630     }
8631   }
8632 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,n_gt_8)8633   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_gt_8) {
8634     TEST_REQUIRES_ARM_NEON;
8635     for (uint32_t n = 9; n < 16; n++) {
8636       for (size_t k = 1; k <= 80; k += 17) {
8637         GemmMicrokernelTester()
8638           .mr(3)
8639           .nr(8)
8640           .kr(2)
8641           .sr(4)
8642           .m(3)
8643           .n(n)
8644           .k(k)
8645           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8646       }
8647     }
8648   }
8649 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,n_gt_8_strided_cn)8650   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_gt_8_strided_cn) {
8651     TEST_REQUIRES_ARM_NEON;
8652     for (uint32_t n = 9; n < 16; n++) {
8653       for (size_t k = 1; k <= 80; k += 17) {
8654         GemmMicrokernelTester()
8655           .mr(3)
8656           .nr(8)
8657           .kr(2)
8658           .sr(4)
8659           .m(3)
8660           .n(n)
8661           .k(k)
8662           .cn_stride(11)
8663           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8664       }
8665     }
8666   }
8667 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,n_gt_8_subtile)8668   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_gt_8_subtile) {
8669     TEST_REQUIRES_ARM_NEON;
8670     for (uint32_t n = 9; n < 16; n++) {
8671       for (size_t k = 1; k <= 80; k += 17) {
8672         for (uint32_t m = 1; m <= 3; m++) {
8673           GemmMicrokernelTester()
8674             .mr(3)
8675             .nr(8)
8676             .kr(2)
8677             .sr(4)
8678             .m(m)
8679             .n(n)
8680             .k(k)
8681             .iterations(1)
8682             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8683         }
8684       }
8685     }
8686   }
8687 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,n_div_8)8688   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_div_8) {
8689     TEST_REQUIRES_ARM_NEON;
8690     for (uint32_t n = 16; n <= 24; n += 8) {
8691       for (size_t k = 1; k <= 80; k += 17) {
8692         GemmMicrokernelTester()
8693           .mr(3)
8694           .nr(8)
8695           .kr(2)
8696           .sr(4)
8697           .m(3)
8698           .n(n)
8699           .k(k)
8700           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8701       }
8702     }
8703   }
8704 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,n_div_8_strided_cn)8705   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_div_8_strided_cn) {
8706     TEST_REQUIRES_ARM_NEON;
8707     for (uint32_t n = 16; n <= 24; n += 8) {
8708       for (size_t k = 1; k <= 80; k += 17) {
8709         GemmMicrokernelTester()
8710           .mr(3)
8711           .nr(8)
8712           .kr(2)
8713           .sr(4)
8714           .m(3)
8715           .n(n)
8716           .k(k)
8717           .cn_stride(11)
8718           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8719       }
8720     }
8721   }
8722 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,n_div_8_subtile)8723   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_div_8_subtile) {
8724     TEST_REQUIRES_ARM_NEON;
8725     for (uint32_t n = 16; n <= 24; n += 8) {
8726       for (size_t k = 1; k <= 80; k += 17) {
8727         for (uint32_t m = 1; m <= 3; m++) {
8728           GemmMicrokernelTester()
8729             .mr(3)
8730             .nr(8)
8731             .kr(2)
8732             .sr(4)
8733             .m(m)
8734             .n(n)
8735             .k(k)
8736             .iterations(1)
8737             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8738         }
8739       }
8740     }
8741   }
8742 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,small_kernel)8743   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, small_kernel) {
8744     TEST_REQUIRES_ARM_NEON;
8745     for (size_t k = 1; k <= 80; k += 17) {
8746       GemmMicrokernelTester()
8747         .mr(3)
8748         .nr(8)
8749         .kr(2)
8750         .sr(4)
8751         .m(3)
8752         .n(8)
8753         .k(k)
8754         .ks(3)
8755         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8756     }
8757   }
8758 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,small_kernel_subtile)8759   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, small_kernel_subtile) {
8760     TEST_REQUIRES_ARM_NEON;
8761     for (size_t k = 1; k <= 80; k += 17) {
8762       for (uint32_t n = 1; n <= 8; n++) {
8763         for (uint32_t m = 1; m <= 3; m++) {
8764           GemmMicrokernelTester()
8765             .mr(3)
8766             .nr(8)
8767             .kr(2)
8768             .sr(4)
8769             .m(m)
8770             .n(n)
8771             .k(k)
8772             .ks(3)
8773             .iterations(1)
8774             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8775         }
8776       }
8777     }
8778   }
8779 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,n_gt_8_small_kernel)8780   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_gt_8_small_kernel) {
8781     TEST_REQUIRES_ARM_NEON;
8782     for (uint32_t n = 9; n < 16; n++) {
8783       for (size_t k = 1; k <= 80; k += 17) {
8784         GemmMicrokernelTester()
8785           .mr(3)
8786           .nr(8)
8787           .kr(2)
8788           .sr(4)
8789           .m(3)
8790           .n(n)
8791           .k(k)
8792           .ks(3)
8793           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8794       }
8795     }
8796   }
8797 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,n_div_8_small_kernel)8798   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_div_8_small_kernel) {
8799     TEST_REQUIRES_ARM_NEON;
8800     for (uint32_t n = 16; n <= 24; n += 8) {
8801       for (size_t k = 1; k <= 80; k += 17) {
8802         GemmMicrokernelTester()
8803           .mr(3)
8804           .nr(8)
8805           .kr(2)
8806           .sr(4)
8807           .m(3)
8808           .n(n)
8809           .k(k)
8810           .ks(3)
8811           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8812       }
8813     }
8814   }
8815 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,strided_cm_subtile)8816   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, strided_cm_subtile) {
8817     TEST_REQUIRES_ARM_NEON;
8818     for (size_t k = 1; k <= 80; k += 17) {
8819       for (uint32_t n = 1; n <= 8; n++) {
8820         for (uint32_t m = 1; m <= 3; m++) {
8821           GemmMicrokernelTester()
8822             .mr(3)
8823             .nr(8)
8824             .kr(2)
8825             .sr(4)
8826             .m(m)
8827             .n(n)
8828             .k(k)
8829             .cm_stride(11)
8830             .iterations(1)
8831             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8832         }
8833       }
8834     }
8835   }
8836 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,a_offset)8837   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, a_offset) {
8838     TEST_REQUIRES_ARM_NEON;
8839     for (size_t k = 1; k <= 80; k += 17) {
8840       GemmMicrokernelTester()
8841         .mr(3)
8842         .nr(8)
8843         .kr(2)
8844         .sr(4)
8845         .m(3)
8846         .n(8)
8847         .k(k)
8848         .ks(3)
8849         .a_offset(251)
8850         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8851     }
8852   }
8853 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,zero)8854   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, zero) {
8855     TEST_REQUIRES_ARM_NEON;
8856     for (size_t k = 1; k <= 80; k += 17) {
8857       for (uint32_t mz = 0; mz < 3; mz++) {
8858         GemmMicrokernelTester()
8859           .mr(3)
8860           .nr(8)
8861           .kr(2)
8862           .sr(4)
8863           .m(3)
8864           .n(8)
8865           .k(k)
8866           .ks(3)
8867           .a_offset(251)
8868           .zero_index(mz)
8869           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8870       }
8871     }
8872   }
8873 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,qmin)8874   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, qmin) {
8875     TEST_REQUIRES_ARM_NEON;
8876     GemmMicrokernelTester()
8877       .mr(3)
8878       .nr(8)
8879       .kr(2)
8880       .sr(4)
8881       .m(3)
8882       .n(8)
8883       .k(16)
8884       .qmin(128)
8885       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8886   }
8887 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,qmax)8888   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, qmax) {
8889     TEST_REQUIRES_ARM_NEON;
8890     GemmMicrokernelTester()
8891       .mr(3)
8892       .nr(8)
8893       .kr(2)
8894       .sr(4)
8895       .m(3)
8896       .n(8)
8897       .k(16)
8898       .qmax(128)
8899       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8900   }
8901 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL,strided_cm)8902   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, strided_cm) {
8903     TEST_REQUIRES_ARM_NEON;
8904     GemmMicrokernelTester()
8905       .mr(3)
8906       .nr(8)
8907       .kr(2)
8908       .sr(4)
8909       .m(3)
8910       .n(8)
8911       .k(16)
8912       .cm_stride(11)
8913       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8914   }
8915 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
8916 
8917 
8918 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_eq_16)8919   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16) {
8920     TEST_REQUIRES_ARM_NEON;
8921     GemmMicrokernelTester()
8922       .mr(4)
8923       .nr(8)
8924       .kr(2)
8925       .sr(4)
8926       .m(4)
8927       .n(8)
8928       .k(16)
8929       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8930   }
8931 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,strided_cn)8932   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, strided_cn) {
8933     TEST_REQUIRES_ARM_NEON;
8934     GemmMicrokernelTester()
8935       .mr(4)
8936       .nr(8)
8937       .kr(2)
8938       .sr(4)
8939       .m(4)
8940       .n(8)
8941       .k(16)
8942       .cn_stride(11)
8943       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8944   }
8945 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_eq_16_subtile)8946   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16_subtile) {
8947     TEST_REQUIRES_ARM_NEON;
8948     for (uint32_t n = 1; n <= 8; n++) {
8949       for (uint32_t m = 1; m <= 4; m++) {
8950         GemmMicrokernelTester()
8951           .mr(4)
8952           .nr(8)
8953           .kr(2)
8954           .sr(4)
8955           .m(m)
8956           .n(n)
8957           .k(16)
8958           .iterations(1)
8959           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8960       }
8961     }
8962   }
8963 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_eq_16_subtile_m)8964   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16_subtile_m) {
8965     TEST_REQUIRES_ARM_NEON;
8966     for (uint32_t m = 1; m <= 4; m++) {
8967       GemmMicrokernelTester()
8968         .mr(4)
8969         .nr(8)
8970         .kr(2)
8971         .sr(4)
8972         .m(m)
8973         .n(8)
8974         .k(16)
8975         .iterations(1)
8976         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8977     }
8978   }
8979 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_eq_16_subtile_n)8980   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16_subtile_n) {
8981     TEST_REQUIRES_ARM_NEON;
8982     for (uint32_t n = 1; n <= 8; n++) {
8983       GemmMicrokernelTester()
8984         .mr(4)
8985         .nr(8)
8986         .kr(2)
8987         .sr(4)
8988         .m(4)
8989         .n(n)
8990         .k(16)
8991         .iterations(1)
8992         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
8993     }
8994   }
8995 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_lt_16)8996   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_lt_16) {
8997     TEST_REQUIRES_ARM_NEON;
8998     for (size_t k = 1; k < 16; k++) {
8999       GemmMicrokernelTester()
9000         .mr(4)
9001         .nr(8)
9002         .kr(2)
9003         .sr(4)
9004         .m(4)
9005         .n(8)
9006         .k(k)
9007         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9008     }
9009   }
9010 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_lt_16_subtile)9011   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_lt_16_subtile) {
9012     TEST_REQUIRES_ARM_NEON;
9013     for (size_t k = 1; k < 16; k++) {
9014       for (uint32_t n = 1; n <= 8; n++) {
9015         for (uint32_t m = 1; m <= 4; m++) {
9016           GemmMicrokernelTester()
9017             .mr(4)
9018             .nr(8)
9019             .kr(2)
9020             .sr(4)
9021             .m(m)
9022             .n(n)
9023             .k(k)
9024             .iterations(1)
9025             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9026         }
9027       }
9028     }
9029   }
9030 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_gt_16)9031   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_gt_16) {
9032     TEST_REQUIRES_ARM_NEON;
9033     for (size_t k = 17; k < 32; k++) {
9034       GemmMicrokernelTester()
9035         .mr(4)
9036         .nr(8)
9037         .kr(2)
9038         .sr(4)
9039         .m(4)
9040         .n(8)
9041         .k(k)
9042         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9043     }
9044   }
9045 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_gt_16_subtile)9046   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_gt_16_subtile) {
9047     TEST_REQUIRES_ARM_NEON;
9048     for (size_t k = 17; k < 32; k++) {
9049       for (uint32_t n = 1; n <= 8; n++) {
9050         for (uint32_t m = 1; m <= 4; m++) {
9051           GemmMicrokernelTester()
9052             .mr(4)
9053             .nr(8)
9054             .kr(2)
9055             .sr(4)
9056             .m(m)
9057             .n(n)
9058             .k(k)
9059             .iterations(1)
9060             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9061         }
9062       }
9063     }
9064   }
9065 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_div_16)9066   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_div_16) {
9067     TEST_REQUIRES_ARM_NEON;
9068     for (size_t k = 32; k <= 160; k += 16) {
9069       GemmMicrokernelTester()
9070         .mr(4)
9071         .nr(8)
9072         .kr(2)
9073         .sr(4)
9074         .m(4)
9075         .n(8)
9076         .k(k)
9077         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9078     }
9079   }
9080 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_div_16_subtile)9081   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_div_16_subtile) {
9082     TEST_REQUIRES_ARM_NEON;
9083     for (size_t k = 32; k <= 160; k += 16) {
9084       for (uint32_t n = 1; n <= 8; n++) {
9085         for (uint32_t m = 1; m <= 4; m++) {
9086           GemmMicrokernelTester()
9087             .mr(4)
9088             .nr(8)
9089             .kr(2)
9090             .sr(4)
9091             .m(m)
9092             .n(n)
9093             .k(k)
9094             .iterations(1)
9095             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9096         }
9097       }
9098     }
9099   }
9100 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,n_gt_8)9101   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_gt_8) {
9102     TEST_REQUIRES_ARM_NEON;
9103     for (uint32_t n = 9; n < 16; n++) {
9104       for (size_t k = 1; k <= 80; k += 17) {
9105         GemmMicrokernelTester()
9106           .mr(4)
9107           .nr(8)
9108           .kr(2)
9109           .sr(4)
9110           .m(4)
9111           .n(n)
9112           .k(k)
9113           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9114       }
9115     }
9116   }
9117 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,n_gt_8_strided_cn)9118   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_gt_8_strided_cn) {
9119     TEST_REQUIRES_ARM_NEON;
9120     for (uint32_t n = 9; n < 16; n++) {
9121       for (size_t k = 1; k <= 80; k += 17) {
9122         GemmMicrokernelTester()
9123           .mr(4)
9124           .nr(8)
9125           .kr(2)
9126           .sr(4)
9127           .m(4)
9128           .n(n)
9129           .k(k)
9130           .cn_stride(11)
9131           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9132       }
9133     }
9134   }
9135 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,n_gt_8_subtile)9136   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_gt_8_subtile) {
9137     TEST_REQUIRES_ARM_NEON;
9138     for (uint32_t n = 9; n < 16; n++) {
9139       for (size_t k = 1; k <= 80; k += 17) {
9140         for (uint32_t m = 1; m <= 4; m++) {
9141           GemmMicrokernelTester()
9142             .mr(4)
9143             .nr(8)
9144             .kr(2)
9145             .sr(4)
9146             .m(m)
9147             .n(n)
9148             .k(k)
9149             .iterations(1)
9150             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9151         }
9152       }
9153     }
9154   }
9155 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,n_div_8)9156   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_div_8) {
9157     TEST_REQUIRES_ARM_NEON;
9158     for (uint32_t n = 16; n <= 24; n += 8) {
9159       for (size_t k = 1; k <= 80; k += 17) {
9160         GemmMicrokernelTester()
9161           .mr(4)
9162           .nr(8)
9163           .kr(2)
9164           .sr(4)
9165           .m(4)
9166           .n(n)
9167           .k(k)
9168           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9169       }
9170     }
9171   }
9172 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,n_div_8_strided_cn)9173   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_div_8_strided_cn) {
9174     TEST_REQUIRES_ARM_NEON;
9175     for (uint32_t n = 16; n <= 24; n += 8) {
9176       for (size_t k = 1; k <= 80; k += 17) {
9177         GemmMicrokernelTester()
9178           .mr(4)
9179           .nr(8)
9180           .kr(2)
9181           .sr(4)
9182           .m(4)
9183           .n(n)
9184           .k(k)
9185           .cn_stride(11)
9186           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9187       }
9188     }
9189   }
9190 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,n_div_8_subtile)9191   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_div_8_subtile) {
9192     TEST_REQUIRES_ARM_NEON;
9193     for (uint32_t n = 16; n <= 24; n += 8) {
9194       for (size_t k = 1; k <= 80; k += 17) {
9195         for (uint32_t m = 1; m <= 4; m++) {
9196           GemmMicrokernelTester()
9197             .mr(4)
9198             .nr(8)
9199             .kr(2)
9200             .sr(4)
9201             .m(m)
9202             .n(n)
9203             .k(k)
9204             .iterations(1)
9205             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9206         }
9207       }
9208     }
9209   }
9210 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,small_kernel)9211   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, small_kernel) {
9212     TEST_REQUIRES_ARM_NEON;
9213     for (size_t k = 1; k <= 80; k += 17) {
9214       GemmMicrokernelTester()
9215         .mr(4)
9216         .nr(8)
9217         .kr(2)
9218         .sr(4)
9219         .m(4)
9220         .n(8)
9221         .k(k)
9222         .ks(3)
9223         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9224     }
9225   }
9226 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,small_kernel_subtile)9227   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, small_kernel_subtile) {
9228     TEST_REQUIRES_ARM_NEON;
9229     for (size_t k = 1; k <= 80; k += 17) {
9230       for (uint32_t n = 1; n <= 8; n++) {
9231         for (uint32_t m = 1; m <= 4; m++) {
9232           GemmMicrokernelTester()
9233             .mr(4)
9234             .nr(8)
9235             .kr(2)
9236             .sr(4)
9237             .m(m)
9238             .n(n)
9239             .k(k)
9240             .ks(3)
9241             .iterations(1)
9242             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9243         }
9244       }
9245     }
9246   }
9247 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,n_gt_8_small_kernel)9248   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_gt_8_small_kernel) {
9249     TEST_REQUIRES_ARM_NEON;
9250     for (uint32_t n = 9; n < 16; n++) {
9251       for (size_t k = 1; k <= 80; k += 17) {
9252         GemmMicrokernelTester()
9253           .mr(4)
9254           .nr(8)
9255           .kr(2)
9256           .sr(4)
9257           .m(4)
9258           .n(n)
9259           .k(k)
9260           .ks(3)
9261           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9262       }
9263     }
9264   }
9265 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,n_div_8_small_kernel)9266   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_div_8_small_kernel) {
9267     TEST_REQUIRES_ARM_NEON;
9268     for (uint32_t n = 16; n <= 24; n += 8) {
9269       for (size_t k = 1; k <= 80; k += 17) {
9270         GemmMicrokernelTester()
9271           .mr(4)
9272           .nr(8)
9273           .kr(2)
9274           .sr(4)
9275           .m(4)
9276           .n(n)
9277           .k(k)
9278           .ks(3)
9279           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9280       }
9281     }
9282   }
9283 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,strided_cm_subtile)9284   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, strided_cm_subtile) {
9285     TEST_REQUIRES_ARM_NEON;
9286     for (size_t k = 1; k <= 80; k += 17) {
9287       for (uint32_t n = 1; n <= 8; n++) {
9288         for (uint32_t m = 1; m <= 4; m++) {
9289           GemmMicrokernelTester()
9290             .mr(4)
9291             .nr(8)
9292             .kr(2)
9293             .sr(4)
9294             .m(m)
9295             .n(n)
9296             .k(k)
9297             .cm_stride(11)
9298             .iterations(1)
9299             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9300         }
9301       }
9302     }
9303   }
9304 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,a_offset)9305   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, a_offset) {
9306     TEST_REQUIRES_ARM_NEON;
9307     for (size_t k = 1; k <= 80; k += 17) {
9308       GemmMicrokernelTester()
9309         .mr(4)
9310         .nr(8)
9311         .kr(2)
9312         .sr(4)
9313         .m(4)
9314         .n(8)
9315         .k(k)
9316         .ks(3)
9317         .a_offset(331)
9318         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9319     }
9320   }
9321 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,zero)9322   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, zero) {
9323     TEST_REQUIRES_ARM_NEON;
9324     for (size_t k = 1; k <= 80; k += 17) {
9325       for (uint32_t mz = 0; mz < 4; mz++) {
9326         GemmMicrokernelTester()
9327           .mr(4)
9328           .nr(8)
9329           .kr(2)
9330           .sr(4)
9331           .m(4)
9332           .n(8)
9333           .k(k)
9334           .ks(3)
9335           .a_offset(331)
9336           .zero_index(mz)
9337           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9338       }
9339     }
9340   }
9341 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,qmin)9342   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, qmin) {
9343     TEST_REQUIRES_ARM_NEON;
9344     GemmMicrokernelTester()
9345       .mr(4)
9346       .nr(8)
9347       .kr(2)
9348       .sr(4)
9349       .m(4)
9350       .n(8)
9351       .k(16)
9352       .qmin(128)
9353       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9354   }
9355 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,qmax)9356   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, qmax) {
9357     TEST_REQUIRES_ARM_NEON;
9358     GemmMicrokernelTester()
9359       .mr(4)
9360       .nr(8)
9361       .kr(2)
9362       .sr(4)
9363       .m(4)
9364       .n(8)
9365       .k(16)
9366       .qmax(128)
9367       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9368   }
9369 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,strided_cm)9370   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, strided_cm) {
9371     TEST_REQUIRES_ARM_NEON;
9372     GemmMicrokernelTester()
9373       .mr(4)
9374       .nr(8)
9375       .kr(2)
9376       .sr(4)
9377       .m(4)
9378       .n(8)
9379       .k(16)
9380       .cm_stride(11)
9381       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9382   }
9383 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
9384 
9385 
9386 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,k_eq_16)9387   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_eq_16) {
9388     TEST_REQUIRES_ARM_NEON;
9389     GemmMicrokernelTester()
9390       .mr(2)
9391       .nr(16)
9392       .kr(2)
9393       .sr(4)
9394       .m(2)
9395       .n(16)
9396       .k(16)
9397       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9398   }
9399 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,strided_cn)9400   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, strided_cn) {
9401     TEST_REQUIRES_ARM_NEON;
9402     GemmMicrokernelTester()
9403       .mr(2)
9404       .nr(16)
9405       .kr(2)
9406       .sr(4)
9407       .m(2)
9408       .n(16)
9409       .k(16)
9410       .cn_stride(19)
9411       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9412   }
9413 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,k_eq_16_subtile)9414   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_eq_16_subtile) {
9415     TEST_REQUIRES_ARM_NEON;
9416     for (uint32_t n = 1; n <= 16; n++) {
9417       for (uint32_t m = 1; m <= 2; m++) {
9418         GemmMicrokernelTester()
9419           .mr(2)
9420           .nr(16)
9421           .kr(2)
9422           .sr(4)
9423           .m(m)
9424           .n(n)
9425           .k(16)
9426           .iterations(1)
9427           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9428       }
9429     }
9430   }
9431 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,k_eq_16_subtile_m)9432   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_eq_16_subtile_m) {
9433     TEST_REQUIRES_ARM_NEON;
9434     for (uint32_t m = 1; m <= 2; m++) {
9435       GemmMicrokernelTester()
9436         .mr(2)
9437         .nr(16)
9438         .kr(2)
9439         .sr(4)
9440         .m(m)
9441         .n(16)
9442         .k(16)
9443         .iterations(1)
9444         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9445     }
9446   }
9447 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,k_eq_16_subtile_n)9448   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_eq_16_subtile_n) {
9449     TEST_REQUIRES_ARM_NEON;
9450     for (uint32_t n = 1; n <= 16; n++) {
9451       GemmMicrokernelTester()
9452         .mr(2)
9453         .nr(16)
9454         .kr(2)
9455         .sr(4)
9456         .m(2)
9457         .n(n)
9458         .k(16)
9459         .iterations(1)
9460         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9461     }
9462   }
9463 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,k_lt_16)9464   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_lt_16) {
9465     TEST_REQUIRES_ARM_NEON;
9466     for (size_t k = 1; k < 16; k++) {
9467       GemmMicrokernelTester()
9468         .mr(2)
9469         .nr(16)
9470         .kr(2)
9471         .sr(4)
9472         .m(2)
9473         .n(16)
9474         .k(k)
9475         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9476     }
9477   }
9478 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,k_lt_16_subtile)9479   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_lt_16_subtile) {
9480     TEST_REQUIRES_ARM_NEON;
9481     for (size_t k = 1; k < 16; k++) {
9482       for (uint32_t n = 1; n <= 16; n++) {
9483         for (uint32_t m = 1; m <= 2; m++) {
9484           GemmMicrokernelTester()
9485             .mr(2)
9486             .nr(16)
9487             .kr(2)
9488             .sr(4)
9489             .m(m)
9490             .n(n)
9491             .k(k)
9492             .iterations(1)
9493             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9494         }
9495       }
9496     }
9497   }
9498 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,k_gt_16)9499   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_gt_16) {
9500     TEST_REQUIRES_ARM_NEON;
9501     for (size_t k = 17; k < 32; k++) {
9502       GemmMicrokernelTester()
9503         .mr(2)
9504         .nr(16)
9505         .kr(2)
9506         .sr(4)
9507         .m(2)
9508         .n(16)
9509         .k(k)
9510         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9511     }
9512   }
9513 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,k_gt_16_subtile)9514   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_gt_16_subtile) {
9515     TEST_REQUIRES_ARM_NEON;
9516     for (size_t k = 17; k < 32; k++) {
9517       for (uint32_t n = 1; n <= 16; n++) {
9518         for (uint32_t m = 1; m <= 2; m++) {
9519           GemmMicrokernelTester()
9520             .mr(2)
9521             .nr(16)
9522             .kr(2)
9523             .sr(4)
9524             .m(m)
9525             .n(n)
9526             .k(k)
9527             .iterations(1)
9528             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9529         }
9530       }
9531     }
9532   }
9533 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,k_div_16)9534   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_div_16) {
9535     TEST_REQUIRES_ARM_NEON;
9536     for (size_t k = 32; k <= 160; k += 16) {
9537       GemmMicrokernelTester()
9538         .mr(2)
9539         .nr(16)
9540         .kr(2)
9541         .sr(4)
9542         .m(2)
9543         .n(16)
9544         .k(k)
9545         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9546     }
9547   }
9548 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,k_div_16_subtile)9549   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_div_16_subtile) {
9550     TEST_REQUIRES_ARM_NEON;
9551     for (size_t k = 32; k <= 160; k += 16) {
9552       for (uint32_t n = 1; n <= 16; n++) {
9553         for (uint32_t m = 1; m <= 2; m++) {
9554           GemmMicrokernelTester()
9555             .mr(2)
9556             .nr(16)
9557             .kr(2)
9558             .sr(4)
9559             .m(m)
9560             .n(n)
9561             .k(k)
9562             .iterations(1)
9563             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9564         }
9565       }
9566     }
9567   }
9568 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,n_gt_16)9569   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_gt_16) {
9570     TEST_REQUIRES_ARM_NEON;
9571     for (uint32_t n = 17; n < 32; n++) {
9572       for (size_t k = 1; k <= 80; k += 17) {
9573         GemmMicrokernelTester()
9574           .mr(2)
9575           .nr(16)
9576           .kr(2)
9577           .sr(4)
9578           .m(2)
9579           .n(n)
9580           .k(k)
9581           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9582       }
9583     }
9584   }
9585 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,n_gt_16_strided_cn)9586   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_gt_16_strided_cn) {
9587     TEST_REQUIRES_ARM_NEON;
9588     for (uint32_t n = 17; n < 32; n++) {
9589       for (size_t k = 1; k <= 80; k += 17) {
9590         GemmMicrokernelTester()
9591           .mr(2)
9592           .nr(16)
9593           .kr(2)
9594           .sr(4)
9595           .m(2)
9596           .n(n)
9597           .k(k)
9598           .cn_stride(19)
9599           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9600       }
9601     }
9602   }
9603 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,n_gt_16_subtile)9604   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_gt_16_subtile) {
9605     TEST_REQUIRES_ARM_NEON;
9606     for (uint32_t n = 17; n < 32; n++) {
9607       for (size_t k = 1; k <= 80; k += 17) {
9608         for (uint32_t m = 1; m <= 2; m++) {
9609           GemmMicrokernelTester()
9610             .mr(2)
9611             .nr(16)
9612             .kr(2)
9613             .sr(4)
9614             .m(m)
9615             .n(n)
9616             .k(k)
9617             .iterations(1)
9618             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9619         }
9620       }
9621     }
9622   }
9623 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,n_div_16)9624   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_div_16) {
9625     TEST_REQUIRES_ARM_NEON;
9626     for (uint32_t n = 32; n <= 48; n += 16) {
9627       for (size_t k = 1; k <= 80; k += 17) {
9628         GemmMicrokernelTester()
9629           .mr(2)
9630           .nr(16)
9631           .kr(2)
9632           .sr(4)
9633           .m(2)
9634           .n(n)
9635           .k(k)
9636           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9637       }
9638     }
9639   }
9640 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,n_div_16_strided_cn)9641   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_div_16_strided_cn) {
9642     TEST_REQUIRES_ARM_NEON;
9643     for (uint32_t n = 32; n <= 48; n += 16) {
9644       for (size_t k = 1; k <= 80; k += 17) {
9645         GemmMicrokernelTester()
9646           .mr(2)
9647           .nr(16)
9648           .kr(2)
9649           .sr(4)
9650           .m(2)
9651           .n(n)
9652           .k(k)
9653           .cn_stride(19)
9654           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9655       }
9656     }
9657   }
9658 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,n_div_16_subtile)9659   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_div_16_subtile) {
9660     TEST_REQUIRES_ARM_NEON;
9661     for (uint32_t n = 32; n <= 48; n += 16) {
9662       for (size_t k = 1; k <= 80; k += 17) {
9663         for (uint32_t m = 1; m <= 2; m++) {
9664           GemmMicrokernelTester()
9665             .mr(2)
9666             .nr(16)
9667             .kr(2)
9668             .sr(4)
9669             .m(m)
9670             .n(n)
9671             .k(k)
9672             .iterations(1)
9673             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9674         }
9675       }
9676     }
9677   }
9678 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,small_kernel)9679   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, small_kernel) {
9680     TEST_REQUIRES_ARM_NEON;
9681     for (size_t k = 1; k <= 80; k += 17) {
9682       GemmMicrokernelTester()
9683         .mr(2)
9684         .nr(16)
9685         .kr(2)
9686         .sr(4)
9687         .m(2)
9688         .n(16)
9689         .k(k)
9690         .ks(3)
9691         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9692     }
9693   }
9694 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,small_kernel_subtile)9695   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, small_kernel_subtile) {
9696     TEST_REQUIRES_ARM_NEON;
9697     for (size_t k = 1; k <= 80; k += 17) {
9698       for (uint32_t n = 1; n <= 16; n++) {
9699         for (uint32_t m = 1; m <= 2; m++) {
9700           GemmMicrokernelTester()
9701             .mr(2)
9702             .nr(16)
9703             .kr(2)
9704             .sr(4)
9705             .m(m)
9706             .n(n)
9707             .k(k)
9708             .ks(3)
9709             .iterations(1)
9710             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9711         }
9712       }
9713     }
9714   }
9715 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,n_gt_16_small_kernel)9716   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_gt_16_small_kernel) {
9717     TEST_REQUIRES_ARM_NEON;
9718     for (uint32_t n = 17; n < 32; n++) {
9719       for (size_t k = 1; k <= 80; k += 17) {
9720         GemmMicrokernelTester()
9721           .mr(2)
9722           .nr(16)
9723           .kr(2)
9724           .sr(4)
9725           .m(2)
9726           .n(n)
9727           .k(k)
9728           .ks(3)
9729           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9730       }
9731     }
9732   }
9733 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,n_div_16_small_kernel)9734   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_div_16_small_kernel) {
9735     TEST_REQUIRES_ARM_NEON;
9736     for (uint32_t n = 32; n <= 48; n += 16) {
9737       for (size_t k = 1; k <= 80; k += 17) {
9738         GemmMicrokernelTester()
9739           .mr(2)
9740           .nr(16)
9741           .kr(2)
9742           .sr(4)
9743           .m(2)
9744           .n(n)
9745           .k(k)
9746           .ks(3)
9747           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9748       }
9749     }
9750   }
9751 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,strided_cm_subtile)9752   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, strided_cm_subtile) {
9753     TEST_REQUIRES_ARM_NEON;
9754     for (size_t k = 1; k <= 80; k += 17) {
9755       for (uint32_t n = 1; n <= 16; n++) {
9756         for (uint32_t m = 1; m <= 2; m++) {
9757           GemmMicrokernelTester()
9758             .mr(2)
9759             .nr(16)
9760             .kr(2)
9761             .sr(4)
9762             .m(m)
9763             .n(n)
9764             .k(k)
9765             .cm_stride(19)
9766             .iterations(1)
9767             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9768         }
9769       }
9770     }
9771   }
9772 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,a_offset)9773   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, a_offset) {
9774     TEST_REQUIRES_ARM_NEON;
9775     for (size_t k = 1; k <= 80; k += 17) {
9776       GemmMicrokernelTester()
9777         .mr(2)
9778         .nr(16)
9779         .kr(2)
9780         .sr(4)
9781         .m(2)
9782         .n(16)
9783         .k(k)
9784         .ks(3)
9785         .a_offset(163)
9786         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9787     }
9788   }
9789 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,zero)9790   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, zero) {
9791     TEST_REQUIRES_ARM_NEON;
9792     for (size_t k = 1; k <= 80; k += 17) {
9793       for (uint32_t mz = 0; mz < 2; mz++) {
9794         GemmMicrokernelTester()
9795           .mr(2)
9796           .nr(16)
9797           .kr(2)
9798           .sr(4)
9799           .m(2)
9800           .n(16)
9801           .k(k)
9802           .ks(3)
9803           .a_offset(163)
9804           .zero_index(mz)
9805           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9806       }
9807     }
9808   }
9809 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,qmin)9810   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, qmin) {
9811     TEST_REQUIRES_ARM_NEON;
9812     GemmMicrokernelTester()
9813       .mr(2)
9814       .nr(16)
9815       .kr(2)
9816       .sr(4)
9817       .m(2)
9818       .n(16)
9819       .k(16)
9820       .qmin(128)
9821       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9822   }
9823 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,qmax)9824   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, qmax) {
9825     TEST_REQUIRES_ARM_NEON;
9826     GemmMicrokernelTester()
9827       .mr(2)
9828       .nr(16)
9829       .kr(2)
9830       .sr(4)
9831       .m(2)
9832       .n(16)
9833       .k(16)
9834       .qmax(128)
9835       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9836   }
9837 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL,strided_cm)9838   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, strided_cm) {
9839     TEST_REQUIRES_ARM_NEON;
9840     GemmMicrokernelTester()
9841       .mr(2)
9842       .nr(16)
9843       .kr(2)
9844       .sr(4)
9845       .m(2)
9846       .n(16)
9847       .k(16)
9848       .cm_stride(19)
9849       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9850   }
9851 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
9852 
9853 
9854 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,k_eq_16)9855   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_eq_16) {
9856     TEST_REQUIRES_ARM_NEON;
9857     GemmMicrokernelTester()
9858       .mr(3)
9859       .nr(16)
9860       .kr(2)
9861       .sr(4)
9862       .m(3)
9863       .n(16)
9864       .k(16)
9865       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9866   }
9867 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,strided_cn)9868   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, strided_cn) {
9869     TEST_REQUIRES_ARM_NEON;
9870     GemmMicrokernelTester()
9871       .mr(3)
9872       .nr(16)
9873       .kr(2)
9874       .sr(4)
9875       .m(3)
9876       .n(16)
9877       .k(16)
9878       .cn_stride(19)
9879       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9880   }
9881 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,k_eq_16_subtile)9882   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_eq_16_subtile) {
9883     TEST_REQUIRES_ARM_NEON;
9884     for (uint32_t n = 1; n <= 16; n++) {
9885       for (uint32_t m = 1; m <= 3; m++) {
9886         GemmMicrokernelTester()
9887           .mr(3)
9888           .nr(16)
9889           .kr(2)
9890           .sr(4)
9891           .m(m)
9892           .n(n)
9893           .k(16)
9894           .iterations(1)
9895           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9896       }
9897     }
9898   }
9899 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,k_eq_16_subtile_m)9900   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_eq_16_subtile_m) {
9901     TEST_REQUIRES_ARM_NEON;
9902     for (uint32_t m = 1; m <= 3; m++) {
9903       GemmMicrokernelTester()
9904         .mr(3)
9905         .nr(16)
9906         .kr(2)
9907         .sr(4)
9908         .m(m)
9909         .n(16)
9910         .k(16)
9911         .iterations(1)
9912         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9913     }
9914   }
9915 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,k_eq_16_subtile_n)9916   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_eq_16_subtile_n) {
9917     TEST_REQUIRES_ARM_NEON;
9918     for (uint32_t n = 1; n <= 16; n++) {
9919       GemmMicrokernelTester()
9920         .mr(3)
9921         .nr(16)
9922         .kr(2)
9923         .sr(4)
9924         .m(3)
9925         .n(n)
9926         .k(16)
9927         .iterations(1)
9928         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9929     }
9930   }
9931 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,k_lt_16)9932   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_lt_16) {
9933     TEST_REQUIRES_ARM_NEON;
9934     for (size_t k = 1; k < 16; k++) {
9935       GemmMicrokernelTester()
9936         .mr(3)
9937         .nr(16)
9938         .kr(2)
9939         .sr(4)
9940         .m(3)
9941         .n(16)
9942         .k(k)
9943         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9944     }
9945   }
9946 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,k_lt_16_subtile)9947   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_lt_16_subtile) {
9948     TEST_REQUIRES_ARM_NEON;
9949     for (size_t k = 1; k < 16; k++) {
9950       for (uint32_t n = 1; n <= 16; n++) {
9951         for (uint32_t m = 1; m <= 3; m++) {
9952           GemmMicrokernelTester()
9953             .mr(3)
9954             .nr(16)
9955             .kr(2)
9956             .sr(4)
9957             .m(m)
9958             .n(n)
9959             .k(k)
9960             .iterations(1)
9961             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9962         }
9963       }
9964     }
9965   }
9966 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,k_gt_16)9967   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_gt_16) {
9968     TEST_REQUIRES_ARM_NEON;
9969     for (size_t k = 17; k < 32; k++) {
9970       GemmMicrokernelTester()
9971         .mr(3)
9972         .nr(16)
9973         .kr(2)
9974         .sr(4)
9975         .m(3)
9976         .n(16)
9977         .k(k)
9978         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9979     }
9980   }
9981 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,k_gt_16_subtile)9982   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_gt_16_subtile) {
9983     TEST_REQUIRES_ARM_NEON;
9984     for (size_t k = 17; k < 32; k++) {
9985       for (uint32_t n = 1; n <= 16; n++) {
9986         for (uint32_t m = 1; m <= 3; m++) {
9987           GemmMicrokernelTester()
9988             .mr(3)
9989             .nr(16)
9990             .kr(2)
9991             .sr(4)
9992             .m(m)
9993             .n(n)
9994             .k(k)
9995             .iterations(1)
9996             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
9997         }
9998       }
9999     }
10000   }
10001 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,k_div_16)10002   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_div_16) {
10003     TEST_REQUIRES_ARM_NEON;
10004     for (size_t k = 32; k <= 160; k += 16) {
10005       GemmMicrokernelTester()
10006         .mr(3)
10007         .nr(16)
10008         .kr(2)
10009         .sr(4)
10010         .m(3)
10011         .n(16)
10012         .k(k)
10013         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10014     }
10015   }
10016 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,k_div_16_subtile)10017   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_div_16_subtile) {
10018     TEST_REQUIRES_ARM_NEON;
10019     for (size_t k = 32; k <= 160; k += 16) {
10020       for (uint32_t n = 1; n <= 16; n++) {
10021         for (uint32_t m = 1; m <= 3; m++) {
10022           GemmMicrokernelTester()
10023             .mr(3)
10024             .nr(16)
10025             .kr(2)
10026             .sr(4)
10027             .m(m)
10028             .n(n)
10029             .k(k)
10030             .iterations(1)
10031             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10032         }
10033       }
10034     }
10035   }
10036 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,n_gt_16)10037   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_gt_16) {
10038     TEST_REQUIRES_ARM_NEON;
10039     for (uint32_t n = 17; n < 32; n++) {
10040       for (size_t k = 1; k <= 80; k += 17) {
10041         GemmMicrokernelTester()
10042           .mr(3)
10043           .nr(16)
10044           .kr(2)
10045           .sr(4)
10046           .m(3)
10047           .n(n)
10048           .k(k)
10049           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10050       }
10051     }
10052   }
10053 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,n_gt_16_strided_cn)10054   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_gt_16_strided_cn) {
10055     TEST_REQUIRES_ARM_NEON;
10056     for (uint32_t n = 17; n < 32; n++) {
10057       for (size_t k = 1; k <= 80; k += 17) {
10058         GemmMicrokernelTester()
10059           .mr(3)
10060           .nr(16)
10061           .kr(2)
10062           .sr(4)
10063           .m(3)
10064           .n(n)
10065           .k(k)
10066           .cn_stride(19)
10067           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10068       }
10069     }
10070   }
10071 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,n_gt_16_subtile)10072   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_gt_16_subtile) {
10073     TEST_REQUIRES_ARM_NEON;
10074     for (uint32_t n = 17; n < 32; n++) {
10075       for (size_t k = 1; k <= 80; k += 17) {
10076         for (uint32_t m = 1; m <= 3; m++) {
10077           GemmMicrokernelTester()
10078             .mr(3)
10079             .nr(16)
10080             .kr(2)
10081             .sr(4)
10082             .m(m)
10083             .n(n)
10084             .k(k)
10085             .iterations(1)
10086             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10087         }
10088       }
10089     }
10090   }
10091 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,n_div_16)10092   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_div_16) {
10093     TEST_REQUIRES_ARM_NEON;
10094     for (uint32_t n = 32; n <= 48; n += 16) {
10095       for (size_t k = 1; k <= 80; k += 17) {
10096         GemmMicrokernelTester()
10097           .mr(3)
10098           .nr(16)
10099           .kr(2)
10100           .sr(4)
10101           .m(3)
10102           .n(n)
10103           .k(k)
10104           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10105       }
10106     }
10107   }
10108 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,n_div_16_strided_cn)10109   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_div_16_strided_cn) {
10110     TEST_REQUIRES_ARM_NEON;
10111     for (uint32_t n = 32; n <= 48; n += 16) {
10112       for (size_t k = 1; k <= 80; k += 17) {
10113         GemmMicrokernelTester()
10114           .mr(3)
10115           .nr(16)
10116           .kr(2)
10117           .sr(4)
10118           .m(3)
10119           .n(n)
10120           .k(k)
10121           .cn_stride(19)
10122           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10123       }
10124     }
10125   }
10126 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,n_div_16_subtile)10127   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_div_16_subtile) {
10128     TEST_REQUIRES_ARM_NEON;
10129     for (uint32_t n = 32; n <= 48; n += 16) {
10130       for (size_t k = 1; k <= 80; k += 17) {
10131         for (uint32_t m = 1; m <= 3; m++) {
10132           GemmMicrokernelTester()
10133             .mr(3)
10134             .nr(16)
10135             .kr(2)
10136             .sr(4)
10137             .m(m)
10138             .n(n)
10139             .k(k)
10140             .iterations(1)
10141             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10142         }
10143       }
10144     }
10145   }
10146 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,small_kernel)10147   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, small_kernel) {
10148     TEST_REQUIRES_ARM_NEON;
10149     for (size_t k = 1; k <= 80; k += 17) {
10150       GemmMicrokernelTester()
10151         .mr(3)
10152         .nr(16)
10153         .kr(2)
10154         .sr(4)
10155         .m(3)
10156         .n(16)
10157         .k(k)
10158         .ks(3)
10159         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10160     }
10161   }
10162 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,small_kernel_subtile)10163   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, small_kernel_subtile) {
10164     TEST_REQUIRES_ARM_NEON;
10165     for (size_t k = 1; k <= 80; k += 17) {
10166       for (uint32_t n = 1; n <= 16; n++) {
10167         for (uint32_t m = 1; m <= 3; m++) {
10168           GemmMicrokernelTester()
10169             .mr(3)
10170             .nr(16)
10171             .kr(2)
10172             .sr(4)
10173             .m(m)
10174             .n(n)
10175             .k(k)
10176             .ks(3)
10177             .iterations(1)
10178             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10179         }
10180       }
10181     }
10182   }
10183 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,n_gt_16_small_kernel)10184   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_gt_16_small_kernel) {
10185     TEST_REQUIRES_ARM_NEON;
10186     for (uint32_t n = 17; n < 32; n++) {
10187       for (size_t k = 1; k <= 80; k += 17) {
10188         GemmMicrokernelTester()
10189           .mr(3)
10190           .nr(16)
10191           .kr(2)
10192           .sr(4)
10193           .m(3)
10194           .n(n)
10195           .k(k)
10196           .ks(3)
10197           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10198       }
10199     }
10200   }
10201 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,n_div_16_small_kernel)10202   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_div_16_small_kernel) {
10203     TEST_REQUIRES_ARM_NEON;
10204     for (uint32_t n = 32; n <= 48; n += 16) {
10205       for (size_t k = 1; k <= 80; k += 17) {
10206         GemmMicrokernelTester()
10207           .mr(3)
10208           .nr(16)
10209           .kr(2)
10210           .sr(4)
10211           .m(3)
10212           .n(n)
10213           .k(k)
10214           .ks(3)
10215           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10216       }
10217     }
10218   }
10219 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,strided_cm_subtile)10220   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, strided_cm_subtile) {
10221     TEST_REQUIRES_ARM_NEON;
10222     for (size_t k = 1; k <= 80; k += 17) {
10223       for (uint32_t n = 1; n <= 16; n++) {
10224         for (uint32_t m = 1; m <= 3; m++) {
10225           GemmMicrokernelTester()
10226             .mr(3)
10227             .nr(16)
10228             .kr(2)
10229             .sr(4)
10230             .m(m)
10231             .n(n)
10232             .k(k)
10233             .cm_stride(19)
10234             .iterations(1)
10235             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10236         }
10237       }
10238     }
10239   }
10240 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,a_offset)10241   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, a_offset) {
10242     TEST_REQUIRES_ARM_NEON;
10243     for (size_t k = 1; k <= 80; k += 17) {
10244       GemmMicrokernelTester()
10245         .mr(3)
10246         .nr(16)
10247         .kr(2)
10248         .sr(4)
10249         .m(3)
10250         .n(16)
10251         .k(k)
10252         .ks(3)
10253         .a_offset(251)
10254         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10255     }
10256   }
10257 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,zero)10258   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, zero) {
10259     TEST_REQUIRES_ARM_NEON;
10260     for (size_t k = 1; k <= 80; k += 17) {
10261       for (uint32_t mz = 0; mz < 3; mz++) {
10262         GemmMicrokernelTester()
10263           .mr(3)
10264           .nr(16)
10265           .kr(2)
10266           .sr(4)
10267           .m(3)
10268           .n(16)
10269           .k(k)
10270           .ks(3)
10271           .a_offset(251)
10272           .zero_index(mz)
10273           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10274       }
10275     }
10276   }
10277 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,qmin)10278   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, qmin) {
10279     TEST_REQUIRES_ARM_NEON;
10280     GemmMicrokernelTester()
10281       .mr(3)
10282       .nr(16)
10283       .kr(2)
10284       .sr(4)
10285       .m(3)
10286       .n(16)
10287       .k(16)
10288       .qmin(128)
10289       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10290   }
10291 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,qmax)10292   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, qmax) {
10293     TEST_REQUIRES_ARM_NEON;
10294     GemmMicrokernelTester()
10295       .mr(3)
10296       .nr(16)
10297       .kr(2)
10298       .sr(4)
10299       .m(3)
10300       .n(16)
10301       .k(16)
10302       .qmax(128)
10303       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10304   }
10305 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL,strided_cm)10306   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, strided_cm) {
10307     TEST_REQUIRES_ARM_NEON;
10308     GemmMicrokernelTester()
10309       .mr(3)
10310       .nr(16)
10311       .kr(2)
10312       .sr(4)
10313       .m(3)
10314       .n(16)
10315       .k(16)
10316       .cm_stride(19)
10317       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10318   }
10319 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
10320 
10321 
10322 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,k_eq_8)10323   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_eq_8) {
10324     TEST_REQUIRES_ARM_NEON;
10325     GemmMicrokernelTester()
10326       .mr(1)
10327       .nr(8)
10328       .kr(4)
10329       .sr(1)
10330       .m(1)
10331       .n(8)
10332       .k(8)
10333       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10334   }
10335 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,strided_cn)10336   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, strided_cn) {
10337     TEST_REQUIRES_ARM_NEON;
10338     GemmMicrokernelTester()
10339       .mr(1)
10340       .nr(8)
10341       .kr(4)
10342       .sr(1)
10343       .m(1)
10344       .n(8)
10345       .k(8)
10346       .cn_stride(11)
10347       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10348   }
10349 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,k_eq_8_subtile)10350   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_eq_8_subtile) {
10351     TEST_REQUIRES_ARM_NEON;
10352     for (uint32_t n = 1; n <= 8; n++) {
10353       for (uint32_t m = 1; m <= 1; m++) {
10354         GemmMicrokernelTester()
10355           .mr(1)
10356           .nr(8)
10357           .kr(4)
10358           .sr(1)
10359           .m(m)
10360           .n(n)
10361           .k(8)
10362           .iterations(1)
10363           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10364       }
10365     }
10366   }
10367 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,k_eq_8_subtile_m)10368   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_eq_8_subtile_m) {
10369     TEST_REQUIRES_ARM_NEON;
10370     for (uint32_t m = 1; m <= 1; m++) {
10371       GemmMicrokernelTester()
10372         .mr(1)
10373         .nr(8)
10374         .kr(4)
10375         .sr(1)
10376         .m(m)
10377         .n(8)
10378         .k(8)
10379         .iterations(1)
10380         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10381     }
10382   }
10383 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,k_eq_8_subtile_n)10384   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_eq_8_subtile_n) {
10385     TEST_REQUIRES_ARM_NEON;
10386     for (uint32_t n = 1; n <= 8; n++) {
10387       GemmMicrokernelTester()
10388         .mr(1)
10389         .nr(8)
10390         .kr(4)
10391         .sr(1)
10392         .m(1)
10393         .n(n)
10394         .k(8)
10395         .iterations(1)
10396         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10397     }
10398   }
10399 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,k_lt_8)10400   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_lt_8) {
10401     TEST_REQUIRES_ARM_NEON;
10402     for (size_t k = 1; k < 8; k++) {
10403       GemmMicrokernelTester()
10404         .mr(1)
10405         .nr(8)
10406         .kr(4)
10407         .sr(1)
10408         .m(1)
10409         .n(8)
10410         .k(k)
10411         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10412     }
10413   }
10414 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,k_lt_8_subtile)10415   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_lt_8_subtile) {
10416     TEST_REQUIRES_ARM_NEON;
10417     for (size_t k = 1; k < 8; k++) {
10418       for (uint32_t n = 1; n <= 8; n++) {
10419         for (uint32_t m = 1; m <= 1; m++) {
10420           GemmMicrokernelTester()
10421             .mr(1)
10422             .nr(8)
10423             .kr(4)
10424             .sr(1)
10425             .m(m)
10426             .n(n)
10427             .k(k)
10428             .iterations(1)
10429             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10430         }
10431       }
10432     }
10433   }
10434 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,k_gt_8)10435   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_gt_8) {
10436     TEST_REQUIRES_ARM_NEON;
10437     for (size_t k = 9; k < 16; k++) {
10438       GemmMicrokernelTester()
10439         .mr(1)
10440         .nr(8)
10441         .kr(4)
10442         .sr(1)
10443         .m(1)
10444         .n(8)
10445         .k(k)
10446         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10447     }
10448   }
10449 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,k_gt_8_subtile)10450   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_gt_8_subtile) {
10451     TEST_REQUIRES_ARM_NEON;
10452     for (size_t k = 9; k < 16; k++) {
10453       for (uint32_t n = 1; n <= 8; n++) {
10454         for (uint32_t m = 1; m <= 1; m++) {
10455           GemmMicrokernelTester()
10456             .mr(1)
10457             .nr(8)
10458             .kr(4)
10459             .sr(1)
10460             .m(m)
10461             .n(n)
10462             .k(k)
10463             .iterations(1)
10464             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10465         }
10466       }
10467     }
10468   }
10469 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,k_div_8)10470   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_div_8) {
10471     TEST_REQUIRES_ARM_NEON;
10472     for (size_t k = 16; k <= 80; k += 8) {
10473       GemmMicrokernelTester()
10474         .mr(1)
10475         .nr(8)
10476         .kr(4)
10477         .sr(1)
10478         .m(1)
10479         .n(8)
10480         .k(k)
10481         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10482     }
10483   }
10484 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,k_div_8_subtile)10485   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_div_8_subtile) {
10486     TEST_REQUIRES_ARM_NEON;
10487     for (size_t k = 16; k <= 80; k += 8) {
10488       for (uint32_t n = 1; n <= 8; n++) {
10489         for (uint32_t m = 1; m <= 1; m++) {
10490           GemmMicrokernelTester()
10491             .mr(1)
10492             .nr(8)
10493             .kr(4)
10494             .sr(1)
10495             .m(m)
10496             .n(n)
10497             .k(k)
10498             .iterations(1)
10499             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10500         }
10501       }
10502     }
10503   }
10504 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,n_gt_8)10505   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_gt_8) {
10506     TEST_REQUIRES_ARM_NEON;
10507     for (uint32_t n = 9; n < 16; n++) {
10508       for (size_t k = 1; k <= 40; k += 9) {
10509         GemmMicrokernelTester()
10510           .mr(1)
10511           .nr(8)
10512           .kr(4)
10513           .sr(1)
10514           .m(1)
10515           .n(n)
10516           .k(k)
10517           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10518       }
10519     }
10520   }
10521 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,n_gt_8_strided_cn)10522   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_gt_8_strided_cn) {
10523     TEST_REQUIRES_ARM_NEON;
10524     for (uint32_t n = 9; n < 16; n++) {
10525       for (size_t k = 1; k <= 40; k += 9) {
10526         GemmMicrokernelTester()
10527           .mr(1)
10528           .nr(8)
10529           .kr(4)
10530           .sr(1)
10531           .m(1)
10532           .n(n)
10533           .k(k)
10534           .cn_stride(11)
10535           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10536       }
10537     }
10538   }
10539 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,n_gt_8_subtile)10540   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_gt_8_subtile) {
10541     TEST_REQUIRES_ARM_NEON;
10542     for (uint32_t n = 9; n < 16; n++) {
10543       for (size_t k = 1; k <= 40; k += 9) {
10544         for (uint32_t m = 1; m <= 1; m++) {
10545           GemmMicrokernelTester()
10546             .mr(1)
10547             .nr(8)
10548             .kr(4)
10549             .sr(1)
10550             .m(m)
10551             .n(n)
10552             .k(k)
10553             .iterations(1)
10554             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10555         }
10556       }
10557     }
10558   }
10559 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,n_div_8)10560   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_div_8) {
10561     TEST_REQUIRES_ARM_NEON;
10562     for (uint32_t n = 16; n <= 24; n += 8) {
10563       for (size_t k = 1; k <= 40; k += 9) {
10564         GemmMicrokernelTester()
10565           .mr(1)
10566           .nr(8)
10567           .kr(4)
10568           .sr(1)
10569           .m(1)
10570           .n(n)
10571           .k(k)
10572           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10573       }
10574     }
10575   }
10576 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,n_div_8_strided_cn)10577   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_div_8_strided_cn) {
10578     TEST_REQUIRES_ARM_NEON;
10579     for (uint32_t n = 16; n <= 24; n += 8) {
10580       for (size_t k = 1; k <= 40; k += 9) {
10581         GemmMicrokernelTester()
10582           .mr(1)
10583           .nr(8)
10584           .kr(4)
10585           .sr(1)
10586           .m(1)
10587           .n(n)
10588           .k(k)
10589           .cn_stride(11)
10590           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10591       }
10592     }
10593   }
10594 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,n_div_8_subtile)10595   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_div_8_subtile) {
10596     TEST_REQUIRES_ARM_NEON;
10597     for (uint32_t n = 16; n <= 24; n += 8) {
10598       for (size_t k = 1; k <= 40; k += 9) {
10599         for (uint32_t m = 1; m <= 1; m++) {
10600           GemmMicrokernelTester()
10601             .mr(1)
10602             .nr(8)
10603             .kr(4)
10604             .sr(1)
10605             .m(m)
10606             .n(n)
10607             .k(k)
10608             .iterations(1)
10609             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10610         }
10611       }
10612     }
10613   }
10614 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,small_kernel)10615   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, small_kernel) {
10616     TEST_REQUIRES_ARM_NEON;
10617     for (size_t k = 1; k <= 40; k += 9) {
10618       GemmMicrokernelTester()
10619         .mr(1)
10620         .nr(8)
10621         .kr(4)
10622         .sr(1)
10623         .m(1)
10624         .n(8)
10625         .k(k)
10626         .ks(3)
10627         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10628     }
10629   }
10630 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,small_kernel_subtile)10631   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, small_kernel_subtile) {
10632     TEST_REQUIRES_ARM_NEON;
10633     for (size_t k = 1; k <= 40; k += 9) {
10634       for (uint32_t n = 1; n <= 8; n++) {
10635         for (uint32_t m = 1; m <= 1; m++) {
10636           GemmMicrokernelTester()
10637             .mr(1)
10638             .nr(8)
10639             .kr(4)
10640             .sr(1)
10641             .m(m)
10642             .n(n)
10643             .k(k)
10644             .ks(3)
10645             .iterations(1)
10646             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10647         }
10648       }
10649     }
10650   }
10651 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,n_gt_8_small_kernel)10652   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_gt_8_small_kernel) {
10653     TEST_REQUIRES_ARM_NEON;
10654     for (uint32_t n = 9; n < 16; n++) {
10655       for (size_t k = 1; k <= 40; k += 9) {
10656         GemmMicrokernelTester()
10657           .mr(1)
10658           .nr(8)
10659           .kr(4)
10660           .sr(1)
10661           .m(1)
10662           .n(n)
10663           .k(k)
10664           .ks(3)
10665           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10666       }
10667     }
10668   }
10669 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,n_div_8_small_kernel)10670   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_div_8_small_kernel) {
10671     TEST_REQUIRES_ARM_NEON;
10672     for (uint32_t n = 16; n <= 24; n += 8) {
10673       for (size_t k = 1; k <= 40; k += 9) {
10674         GemmMicrokernelTester()
10675           .mr(1)
10676           .nr(8)
10677           .kr(4)
10678           .sr(1)
10679           .m(1)
10680           .n(n)
10681           .k(k)
10682           .ks(3)
10683           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10684       }
10685     }
10686   }
10687 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,strided_cm_subtile)10688   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, strided_cm_subtile) {
10689     TEST_REQUIRES_ARM_NEON;
10690     for (size_t k = 1; k <= 40; k += 9) {
10691       for (uint32_t n = 1; n <= 8; n++) {
10692         for (uint32_t m = 1; m <= 1; m++) {
10693           GemmMicrokernelTester()
10694             .mr(1)
10695             .nr(8)
10696             .kr(4)
10697             .sr(1)
10698             .m(m)
10699             .n(n)
10700             .k(k)
10701             .cm_stride(11)
10702             .iterations(1)
10703             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10704         }
10705       }
10706     }
10707   }
10708 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,a_offset)10709   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, a_offset) {
10710     TEST_REQUIRES_ARM_NEON;
10711     for (size_t k = 1; k <= 40; k += 9) {
10712       GemmMicrokernelTester()
10713         .mr(1)
10714         .nr(8)
10715         .kr(4)
10716         .sr(1)
10717         .m(1)
10718         .n(8)
10719         .k(k)
10720         .ks(3)
10721         .a_offset(43)
10722         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10723     }
10724   }
10725 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,zero)10726   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, zero) {
10727     TEST_REQUIRES_ARM_NEON;
10728     for (size_t k = 1; k <= 40; k += 9) {
10729       for (uint32_t mz = 0; mz < 1; mz++) {
10730         GemmMicrokernelTester()
10731           .mr(1)
10732           .nr(8)
10733           .kr(4)
10734           .sr(1)
10735           .m(1)
10736           .n(8)
10737           .k(k)
10738           .ks(3)
10739           .a_offset(43)
10740           .zero_index(mz)
10741           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10742       }
10743     }
10744   }
10745 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,qmin)10746   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, qmin) {
10747     TEST_REQUIRES_ARM_NEON;
10748     GemmMicrokernelTester()
10749       .mr(1)
10750       .nr(8)
10751       .kr(4)
10752       .sr(1)
10753       .m(1)
10754       .n(8)
10755       .k(8)
10756       .qmin(128)
10757       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10758   }
10759 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,qmax)10760   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, qmax) {
10761     TEST_REQUIRES_ARM_NEON;
10762     GemmMicrokernelTester()
10763       .mr(1)
10764       .nr(8)
10765       .kr(4)
10766       .sr(1)
10767       .m(1)
10768       .n(8)
10769       .k(8)
10770       .qmax(128)
10771       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10772   }
10773 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP,strided_cm)10774   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, strided_cm) {
10775     TEST_REQUIRES_ARM_NEON;
10776     GemmMicrokernelTester()
10777       .mr(1)
10778       .nr(8)
10779       .kr(4)
10780       .sr(1)
10781       .m(1)
10782       .n(8)
10783       .k(8)
10784       .cm_stride(11)
10785       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10786   }
10787 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
10788 
10789 
10790 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,k_eq_8)10791   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, k_eq_8) {
10792     TEST_REQUIRES_ARM_NEON;
10793     GemmMicrokernelTester()
10794       .mr(3)
10795       .nr(8)
10796       .kr(4)
10797       .sr(1)
10798       .m(3)
10799       .n(8)
10800       .k(8)
10801       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10802   }
10803 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,strided_cn)10804   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, strided_cn) {
10805     TEST_REQUIRES_ARM_NEON;
10806     GemmMicrokernelTester()
10807       .mr(3)
10808       .nr(8)
10809       .kr(4)
10810       .sr(1)
10811       .m(3)
10812       .n(8)
10813       .k(8)
10814       .cn_stride(11)
10815       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10816   }
10817 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,k_eq_8_subtile)10818   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, k_eq_8_subtile) {
10819     TEST_REQUIRES_ARM_NEON;
10820     for (uint32_t n = 1; n <= 8; n++) {
10821       for (uint32_t m = 1; m <= 3; m++) {
10822         GemmMicrokernelTester()
10823           .mr(3)
10824           .nr(8)
10825           .kr(4)
10826           .sr(1)
10827           .m(m)
10828           .n(n)
10829           .k(8)
10830           .iterations(1)
10831           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10832       }
10833     }
10834   }
10835 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,k_eq_8_subtile_m)10836   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, k_eq_8_subtile_m) {
10837     TEST_REQUIRES_ARM_NEON;
10838     for (uint32_t m = 1; m <= 3; m++) {
10839       GemmMicrokernelTester()
10840         .mr(3)
10841         .nr(8)
10842         .kr(4)
10843         .sr(1)
10844         .m(m)
10845         .n(8)
10846         .k(8)
10847         .iterations(1)
10848         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10849     }
10850   }
10851 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,k_eq_8_subtile_n)10852   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, k_eq_8_subtile_n) {
10853     TEST_REQUIRES_ARM_NEON;
10854     for (uint32_t n = 1; n <= 8; n++) {
10855       GemmMicrokernelTester()
10856         .mr(3)
10857         .nr(8)
10858         .kr(4)
10859         .sr(1)
10860         .m(3)
10861         .n(n)
10862         .k(8)
10863         .iterations(1)
10864         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10865     }
10866   }
10867 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,k_lt_8)10868   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, k_lt_8) {
10869     TEST_REQUIRES_ARM_NEON;
10870     for (size_t k = 1; k < 8; k++) {
10871       GemmMicrokernelTester()
10872         .mr(3)
10873         .nr(8)
10874         .kr(4)
10875         .sr(1)
10876         .m(3)
10877         .n(8)
10878         .k(k)
10879         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10880     }
10881   }
10882 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,k_lt_8_subtile)10883   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, k_lt_8_subtile) {
10884     TEST_REQUIRES_ARM_NEON;
10885     for (size_t k = 1; k < 8; k++) {
10886       for (uint32_t n = 1; n <= 8; n++) {
10887         for (uint32_t m = 1; m <= 3; m++) {
10888           GemmMicrokernelTester()
10889             .mr(3)
10890             .nr(8)
10891             .kr(4)
10892             .sr(1)
10893             .m(m)
10894             .n(n)
10895             .k(k)
10896             .iterations(1)
10897             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10898         }
10899       }
10900     }
10901   }
10902 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,k_gt_8)10903   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, k_gt_8) {
10904     TEST_REQUIRES_ARM_NEON;
10905     for (size_t k = 9; k < 16; k++) {
10906       GemmMicrokernelTester()
10907         .mr(3)
10908         .nr(8)
10909         .kr(4)
10910         .sr(1)
10911         .m(3)
10912         .n(8)
10913         .k(k)
10914         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10915     }
10916   }
10917 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,k_gt_8_subtile)10918   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, k_gt_8_subtile) {
10919     TEST_REQUIRES_ARM_NEON;
10920     for (size_t k = 9; k < 16; k++) {
10921       for (uint32_t n = 1; n <= 8; n++) {
10922         for (uint32_t m = 1; m <= 3; m++) {
10923           GemmMicrokernelTester()
10924             .mr(3)
10925             .nr(8)
10926             .kr(4)
10927             .sr(1)
10928             .m(m)
10929             .n(n)
10930             .k(k)
10931             .iterations(1)
10932             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10933         }
10934       }
10935     }
10936   }
10937 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,k_div_8)10938   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, k_div_8) {
10939     TEST_REQUIRES_ARM_NEON;
10940     for (size_t k = 16; k <= 80; k += 8) {
10941       GemmMicrokernelTester()
10942         .mr(3)
10943         .nr(8)
10944         .kr(4)
10945         .sr(1)
10946         .m(3)
10947         .n(8)
10948         .k(k)
10949         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10950     }
10951   }
10952 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,k_div_8_subtile)10953   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, k_div_8_subtile) {
10954     TEST_REQUIRES_ARM_NEON;
10955     for (size_t k = 16; k <= 80; k += 8) {
10956       for (uint32_t n = 1; n <= 8; n++) {
10957         for (uint32_t m = 1; m <= 3; m++) {
10958           GemmMicrokernelTester()
10959             .mr(3)
10960             .nr(8)
10961             .kr(4)
10962             .sr(1)
10963             .m(m)
10964             .n(n)
10965             .k(k)
10966             .iterations(1)
10967             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10968         }
10969       }
10970     }
10971   }
10972 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,n_gt_8)10973   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, n_gt_8) {
10974     TEST_REQUIRES_ARM_NEON;
10975     for (uint32_t n = 9; n < 16; n++) {
10976       for (size_t k = 1; k <= 40; k += 9) {
10977         GemmMicrokernelTester()
10978           .mr(3)
10979           .nr(8)
10980           .kr(4)
10981           .sr(1)
10982           .m(3)
10983           .n(n)
10984           .k(k)
10985           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
10986       }
10987     }
10988   }
10989 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,n_gt_8_strided_cn)10990   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, n_gt_8_strided_cn) {
10991     TEST_REQUIRES_ARM_NEON;
10992     for (uint32_t n = 9; n < 16; n++) {
10993       for (size_t k = 1; k <= 40; k += 9) {
10994         GemmMicrokernelTester()
10995           .mr(3)
10996           .nr(8)
10997           .kr(4)
10998           .sr(1)
10999           .m(3)
11000           .n(n)
11001           .k(k)
11002           .cn_stride(11)
11003           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11004       }
11005     }
11006   }
11007 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,n_gt_8_subtile)11008   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, n_gt_8_subtile) {
11009     TEST_REQUIRES_ARM_NEON;
11010     for (uint32_t n = 9; n < 16; n++) {
11011       for (size_t k = 1; k <= 40; k += 9) {
11012         for (uint32_t m = 1; m <= 3; m++) {
11013           GemmMicrokernelTester()
11014             .mr(3)
11015             .nr(8)
11016             .kr(4)
11017             .sr(1)
11018             .m(m)
11019             .n(n)
11020             .k(k)
11021             .iterations(1)
11022             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11023         }
11024       }
11025     }
11026   }
11027 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,n_div_8)11028   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, n_div_8) {
11029     TEST_REQUIRES_ARM_NEON;
11030     for (uint32_t n = 16; n <= 24; n += 8) {
11031       for (size_t k = 1; k <= 40; k += 9) {
11032         GemmMicrokernelTester()
11033           .mr(3)
11034           .nr(8)
11035           .kr(4)
11036           .sr(1)
11037           .m(3)
11038           .n(n)
11039           .k(k)
11040           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11041       }
11042     }
11043   }
11044 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,n_div_8_strided_cn)11045   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, n_div_8_strided_cn) {
11046     TEST_REQUIRES_ARM_NEON;
11047     for (uint32_t n = 16; n <= 24; n += 8) {
11048       for (size_t k = 1; k <= 40; k += 9) {
11049         GemmMicrokernelTester()
11050           .mr(3)
11051           .nr(8)
11052           .kr(4)
11053           .sr(1)
11054           .m(3)
11055           .n(n)
11056           .k(k)
11057           .cn_stride(11)
11058           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11059       }
11060     }
11061   }
11062 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,n_div_8_subtile)11063   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, n_div_8_subtile) {
11064     TEST_REQUIRES_ARM_NEON;
11065     for (uint32_t n = 16; n <= 24; n += 8) {
11066       for (size_t k = 1; k <= 40; k += 9) {
11067         for (uint32_t m = 1; m <= 3; m++) {
11068           GemmMicrokernelTester()
11069             .mr(3)
11070             .nr(8)
11071             .kr(4)
11072             .sr(1)
11073             .m(m)
11074             .n(n)
11075             .k(k)
11076             .iterations(1)
11077             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11078         }
11079       }
11080     }
11081   }
11082 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,small_kernel)11083   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, small_kernel) {
11084     TEST_REQUIRES_ARM_NEON;
11085     for (size_t k = 1; k <= 40; k += 9) {
11086       GemmMicrokernelTester()
11087         .mr(3)
11088         .nr(8)
11089         .kr(4)
11090         .sr(1)
11091         .m(3)
11092         .n(8)
11093         .k(k)
11094         .ks(3)
11095         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11096     }
11097   }
11098 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,small_kernel_subtile)11099   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, small_kernel_subtile) {
11100     TEST_REQUIRES_ARM_NEON;
11101     for (size_t k = 1; k <= 40; k += 9) {
11102       for (uint32_t n = 1; n <= 8; n++) {
11103         for (uint32_t m = 1; m <= 3; m++) {
11104           GemmMicrokernelTester()
11105             .mr(3)
11106             .nr(8)
11107             .kr(4)
11108             .sr(1)
11109             .m(m)
11110             .n(n)
11111             .k(k)
11112             .ks(3)
11113             .iterations(1)
11114             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11115         }
11116       }
11117     }
11118   }
11119 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,n_gt_8_small_kernel)11120   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, n_gt_8_small_kernel) {
11121     TEST_REQUIRES_ARM_NEON;
11122     for (uint32_t n = 9; n < 16; n++) {
11123       for (size_t k = 1; k <= 40; k += 9) {
11124         GemmMicrokernelTester()
11125           .mr(3)
11126           .nr(8)
11127           .kr(4)
11128           .sr(1)
11129           .m(3)
11130           .n(n)
11131           .k(k)
11132           .ks(3)
11133           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11134       }
11135     }
11136   }
11137 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,n_div_8_small_kernel)11138   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, n_div_8_small_kernel) {
11139     TEST_REQUIRES_ARM_NEON;
11140     for (uint32_t n = 16; n <= 24; n += 8) {
11141       for (size_t k = 1; k <= 40; k += 9) {
11142         GemmMicrokernelTester()
11143           .mr(3)
11144           .nr(8)
11145           .kr(4)
11146           .sr(1)
11147           .m(3)
11148           .n(n)
11149           .k(k)
11150           .ks(3)
11151           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11152       }
11153     }
11154   }
11155 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,strided_cm_subtile)11156   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, strided_cm_subtile) {
11157     TEST_REQUIRES_ARM_NEON;
11158     for (size_t k = 1; k <= 40; k += 9) {
11159       for (uint32_t n = 1; n <= 8; n++) {
11160         for (uint32_t m = 1; m <= 3; m++) {
11161           GemmMicrokernelTester()
11162             .mr(3)
11163             .nr(8)
11164             .kr(4)
11165             .sr(1)
11166             .m(m)
11167             .n(n)
11168             .k(k)
11169             .cm_stride(11)
11170             .iterations(1)
11171             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11172         }
11173       }
11174     }
11175   }
11176 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,a_offset)11177   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, a_offset) {
11178     TEST_REQUIRES_ARM_NEON;
11179     for (size_t k = 1; k <= 40; k += 9) {
11180       GemmMicrokernelTester()
11181         .mr(3)
11182         .nr(8)
11183         .kr(4)
11184         .sr(1)
11185         .m(3)
11186         .n(8)
11187         .k(k)
11188         .ks(3)
11189         .a_offset(127)
11190         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11191     }
11192   }
11193 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,zero)11194   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, zero) {
11195     TEST_REQUIRES_ARM_NEON;
11196     for (size_t k = 1; k <= 40; k += 9) {
11197       for (uint32_t mz = 0; mz < 3; mz++) {
11198         GemmMicrokernelTester()
11199           .mr(3)
11200           .nr(8)
11201           .kr(4)
11202           .sr(1)
11203           .m(3)
11204           .n(8)
11205           .k(k)
11206           .ks(3)
11207           .a_offset(127)
11208           .zero_index(mz)
11209           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11210       }
11211     }
11212   }
11213 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,qmin)11214   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, qmin) {
11215     TEST_REQUIRES_ARM_NEON;
11216     GemmMicrokernelTester()
11217       .mr(3)
11218       .nr(8)
11219       .kr(4)
11220       .sr(1)
11221       .m(3)
11222       .n(8)
11223       .k(8)
11224       .qmin(128)
11225       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11226   }
11227 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,qmax)11228   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, qmax) {
11229     TEST_REQUIRES_ARM_NEON;
11230     GemmMicrokernelTester()
11231       .mr(3)
11232       .nr(8)
11233       .kr(4)
11234       .sr(1)
11235       .m(3)
11236       .n(8)
11237       .k(8)
11238       .qmax(128)
11239       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11240   }
11241 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP,strided_cm)11242   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, strided_cm) {
11243     TEST_REQUIRES_ARM_NEON;
11244     GemmMicrokernelTester()
11245       .mr(3)
11246       .nr(8)
11247       .kr(4)
11248       .sr(1)
11249       .m(3)
11250       .n(8)
11251       .k(8)
11252       .cm_stride(11)
11253       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11254   }
11255 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
11256 
11257 
11258 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,k_eq_8)11259   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_eq_8) {
11260     TEST_REQUIRES_ARM_NEON;
11261     GemmMicrokernelTester()
11262       .mr(1)
11263       .nr(16)
11264       .kr(4)
11265       .sr(1)
11266       .m(1)
11267       .n(16)
11268       .k(8)
11269       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11270   }
11271 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,strided_cn)11272   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, strided_cn) {
11273     TEST_REQUIRES_ARM_NEON;
11274     GemmMicrokernelTester()
11275       .mr(1)
11276       .nr(16)
11277       .kr(4)
11278       .sr(1)
11279       .m(1)
11280       .n(16)
11281       .k(8)
11282       .cn_stride(19)
11283       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11284   }
11285 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,k_eq_8_subtile)11286   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_eq_8_subtile) {
11287     TEST_REQUIRES_ARM_NEON;
11288     for (uint32_t n = 1; n <= 16; n++) {
11289       for (uint32_t m = 1; m <= 1; m++) {
11290         GemmMicrokernelTester()
11291           .mr(1)
11292           .nr(16)
11293           .kr(4)
11294           .sr(1)
11295           .m(m)
11296           .n(n)
11297           .k(8)
11298           .iterations(1)
11299           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11300       }
11301     }
11302   }
11303 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,k_eq_8_subtile_m)11304   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_eq_8_subtile_m) {
11305     TEST_REQUIRES_ARM_NEON;
11306     for (uint32_t m = 1; m <= 1; m++) {
11307       GemmMicrokernelTester()
11308         .mr(1)
11309         .nr(16)
11310         .kr(4)
11311         .sr(1)
11312         .m(m)
11313         .n(16)
11314         .k(8)
11315         .iterations(1)
11316         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11317     }
11318   }
11319 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,k_eq_8_subtile_n)11320   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_eq_8_subtile_n) {
11321     TEST_REQUIRES_ARM_NEON;
11322     for (uint32_t n = 1; n <= 16; n++) {
11323       GemmMicrokernelTester()
11324         .mr(1)
11325         .nr(16)
11326         .kr(4)
11327         .sr(1)
11328         .m(1)
11329         .n(n)
11330         .k(8)
11331         .iterations(1)
11332         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11333     }
11334   }
11335 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,k_lt_8)11336   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_lt_8) {
11337     TEST_REQUIRES_ARM_NEON;
11338     for (size_t k = 1; k < 8; k++) {
11339       GemmMicrokernelTester()
11340         .mr(1)
11341         .nr(16)
11342         .kr(4)
11343         .sr(1)
11344         .m(1)
11345         .n(16)
11346         .k(k)
11347         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11348     }
11349   }
11350 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,k_lt_8_subtile)11351   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_lt_8_subtile) {
11352     TEST_REQUIRES_ARM_NEON;
11353     for (size_t k = 1; k < 8; k++) {
11354       for (uint32_t n = 1; n <= 16; n++) {
11355         for (uint32_t m = 1; m <= 1; m++) {
11356           GemmMicrokernelTester()
11357             .mr(1)
11358             .nr(16)
11359             .kr(4)
11360             .sr(1)
11361             .m(m)
11362             .n(n)
11363             .k(k)
11364             .iterations(1)
11365             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11366         }
11367       }
11368     }
11369   }
11370 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,k_gt_8)11371   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_gt_8) {
11372     TEST_REQUIRES_ARM_NEON;
11373     for (size_t k = 9; k < 16; k++) {
11374       GemmMicrokernelTester()
11375         .mr(1)
11376         .nr(16)
11377         .kr(4)
11378         .sr(1)
11379         .m(1)
11380         .n(16)
11381         .k(k)
11382         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11383     }
11384   }
11385 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,k_gt_8_subtile)11386   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_gt_8_subtile) {
11387     TEST_REQUIRES_ARM_NEON;
11388     for (size_t k = 9; k < 16; k++) {
11389       for (uint32_t n = 1; n <= 16; n++) {
11390         for (uint32_t m = 1; m <= 1; m++) {
11391           GemmMicrokernelTester()
11392             .mr(1)
11393             .nr(16)
11394             .kr(4)
11395             .sr(1)
11396             .m(m)
11397             .n(n)
11398             .k(k)
11399             .iterations(1)
11400             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11401         }
11402       }
11403     }
11404   }
11405 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,k_div_8)11406   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_div_8) {
11407     TEST_REQUIRES_ARM_NEON;
11408     for (size_t k = 16; k <= 80; k += 8) {
11409       GemmMicrokernelTester()
11410         .mr(1)
11411         .nr(16)
11412         .kr(4)
11413         .sr(1)
11414         .m(1)
11415         .n(16)
11416         .k(k)
11417         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11418     }
11419   }
11420 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,k_div_8_subtile)11421   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_div_8_subtile) {
11422     TEST_REQUIRES_ARM_NEON;
11423     for (size_t k = 16; k <= 80; k += 8) {
11424       for (uint32_t n = 1; n <= 16; n++) {
11425         for (uint32_t m = 1; m <= 1; m++) {
11426           GemmMicrokernelTester()
11427             .mr(1)
11428             .nr(16)
11429             .kr(4)
11430             .sr(1)
11431             .m(m)
11432             .n(n)
11433             .k(k)
11434             .iterations(1)
11435             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11436         }
11437       }
11438     }
11439   }
11440 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,n_gt_16)11441   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_gt_16) {
11442     TEST_REQUIRES_ARM_NEON;
11443     for (uint32_t n = 17; n < 32; n++) {
11444       for (size_t k = 1; k <= 40; k += 9) {
11445         GemmMicrokernelTester()
11446           .mr(1)
11447           .nr(16)
11448           .kr(4)
11449           .sr(1)
11450           .m(1)
11451           .n(n)
11452           .k(k)
11453           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11454       }
11455     }
11456   }
11457 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,n_gt_16_strided_cn)11458   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_gt_16_strided_cn) {
11459     TEST_REQUIRES_ARM_NEON;
11460     for (uint32_t n = 17; n < 32; n++) {
11461       for (size_t k = 1; k <= 40; k += 9) {
11462         GemmMicrokernelTester()
11463           .mr(1)
11464           .nr(16)
11465           .kr(4)
11466           .sr(1)
11467           .m(1)
11468           .n(n)
11469           .k(k)
11470           .cn_stride(19)
11471           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11472       }
11473     }
11474   }
11475 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,n_gt_16_subtile)11476   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_gt_16_subtile) {
11477     TEST_REQUIRES_ARM_NEON;
11478     for (uint32_t n = 17; n < 32; n++) {
11479       for (size_t k = 1; k <= 40; k += 9) {
11480         for (uint32_t m = 1; m <= 1; m++) {
11481           GemmMicrokernelTester()
11482             .mr(1)
11483             .nr(16)
11484             .kr(4)
11485             .sr(1)
11486             .m(m)
11487             .n(n)
11488             .k(k)
11489             .iterations(1)
11490             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11491         }
11492       }
11493     }
11494   }
11495 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,n_div_16)11496   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_div_16) {
11497     TEST_REQUIRES_ARM_NEON;
11498     for (uint32_t n = 32; n <= 48; n += 16) {
11499       for (size_t k = 1; k <= 40; k += 9) {
11500         GemmMicrokernelTester()
11501           .mr(1)
11502           .nr(16)
11503           .kr(4)
11504           .sr(1)
11505           .m(1)
11506           .n(n)
11507           .k(k)
11508           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11509       }
11510     }
11511   }
11512 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,n_div_16_strided_cn)11513   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_div_16_strided_cn) {
11514     TEST_REQUIRES_ARM_NEON;
11515     for (uint32_t n = 32; n <= 48; n += 16) {
11516       for (size_t k = 1; k <= 40; k += 9) {
11517         GemmMicrokernelTester()
11518           .mr(1)
11519           .nr(16)
11520           .kr(4)
11521           .sr(1)
11522           .m(1)
11523           .n(n)
11524           .k(k)
11525           .cn_stride(19)
11526           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11527       }
11528     }
11529   }
11530 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,n_div_16_subtile)11531   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_div_16_subtile) {
11532     TEST_REQUIRES_ARM_NEON;
11533     for (uint32_t n = 32; n <= 48; n += 16) {
11534       for (size_t k = 1; k <= 40; k += 9) {
11535         for (uint32_t m = 1; m <= 1; m++) {
11536           GemmMicrokernelTester()
11537             .mr(1)
11538             .nr(16)
11539             .kr(4)
11540             .sr(1)
11541             .m(m)
11542             .n(n)
11543             .k(k)
11544             .iterations(1)
11545             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11546         }
11547       }
11548     }
11549   }
11550 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,small_kernel)11551   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, small_kernel) {
11552     TEST_REQUIRES_ARM_NEON;
11553     for (size_t k = 1; k <= 40; k += 9) {
11554       GemmMicrokernelTester()
11555         .mr(1)
11556         .nr(16)
11557         .kr(4)
11558         .sr(1)
11559         .m(1)
11560         .n(16)
11561         .k(k)
11562         .ks(3)
11563         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11564     }
11565   }
11566 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,small_kernel_subtile)11567   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, small_kernel_subtile) {
11568     TEST_REQUIRES_ARM_NEON;
11569     for (size_t k = 1; k <= 40; k += 9) {
11570       for (uint32_t n = 1; n <= 16; n++) {
11571         for (uint32_t m = 1; m <= 1; m++) {
11572           GemmMicrokernelTester()
11573             .mr(1)
11574             .nr(16)
11575             .kr(4)
11576             .sr(1)
11577             .m(m)
11578             .n(n)
11579             .k(k)
11580             .ks(3)
11581             .iterations(1)
11582             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11583         }
11584       }
11585     }
11586   }
11587 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,n_gt_16_small_kernel)11588   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_gt_16_small_kernel) {
11589     TEST_REQUIRES_ARM_NEON;
11590     for (uint32_t n = 17; n < 32; n++) {
11591       for (size_t k = 1; k <= 40; k += 9) {
11592         GemmMicrokernelTester()
11593           .mr(1)
11594           .nr(16)
11595           .kr(4)
11596           .sr(1)
11597           .m(1)
11598           .n(n)
11599           .k(k)
11600           .ks(3)
11601           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11602       }
11603     }
11604   }
11605 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,n_div_16_small_kernel)11606   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_div_16_small_kernel) {
11607     TEST_REQUIRES_ARM_NEON;
11608     for (uint32_t n = 32; n <= 48; n += 16) {
11609       for (size_t k = 1; k <= 40; k += 9) {
11610         GemmMicrokernelTester()
11611           .mr(1)
11612           .nr(16)
11613           .kr(4)
11614           .sr(1)
11615           .m(1)
11616           .n(n)
11617           .k(k)
11618           .ks(3)
11619           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11620       }
11621     }
11622   }
11623 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,strided_cm_subtile)11624   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, strided_cm_subtile) {
11625     TEST_REQUIRES_ARM_NEON;
11626     for (size_t k = 1; k <= 40; k += 9) {
11627       for (uint32_t n = 1; n <= 16; n++) {
11628         for (uint32_t m = 1; m <= 1; m++) {
11629           GemmMicrokernelTester()
11630             .mr(1)
11631             .nr(16)
11632             .kr(4)
11633             .sr(1)
11634             .m(m)
11635             .n(n)
11636             .k(k)
11637             .cm_stride(19)
11638             .iterations(1)
11639             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11640         }
11641       }
11642     }
11643   }
11644 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,a_offset)11645   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, a_offset) {
11646     TEST_REQUIRES_ARM_NEON;
11647     for (size_t k = 1; k <= 40; k += 9) {
11648       GemmMicrokernelTester()
11649         .mr(1)
11650         .nr(16)
11651         .kr(4)
11652         .sr(1)
11653         .m(1)
11654         .n(16)
11655         .k(k)
11656         .ks(3)
11657         .a_offset(43)
11658         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11659     }
11660   }
11661 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,zero)11662   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, zero) {
11663     TEST_REQUIRES_ARM_NEON;
11664     for (size_t k = 1; k <= 40; k += 9) {
11665       for (uint32_t mz = 0; mz < 1; mz++) {
11666         GemmMicrokernelTester()
11667           .mr(1)
11668           .nr(16)
11669           .kr(4)
11670           .sr(1)
11671           .m(1)
11672           .n(16)
11673           .k(k)
11674           .ks(3)
11675           .a_offset(43)
11676           .zero_index(mz)
11677           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11678       }
11679     }
11680   }
11681 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,qmin)11682   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, qmin) {
11683     TEST_REQUIRES_ARM_NEON;
11684     GemmMicrokernelTester()
11685       .mr(1)
11686       .nr(16)
11687       .kr(4)
11688       .sr(1)
11689       .m(1)
11690       .n(16)
11691       .k(8)
11692       .qmin(128)
11693       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11694   }
11695 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,qmax)11696   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, qmax) {
11697     TEST_REQUIRES_ARM_NEON;
11698     GemmMicrokernelTester()
11699       .mr(1)
11700       .nr(16)
11701       .kr(4)
11702       .sr(1)
11703       .m(1)
11704       .n(16)
11705       .k(8)
11706       .qmax(128)
11707       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11708   }
11709 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP,strided_cm)11710   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, strided_cm) {
11711     TEST_REQUIRES_ARM_NEON;
11712     GemmMicrokernelTester()
11713       .mr(1)
11714       .nr(16)
11715       .kr(4)
11716       .sr(1)
11717       .m(1)
11718       .n(16)
11719       .k(8)
11720       .cm_stride(19)
11721       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11722   }
11723 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
11724 
11725 
11726 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_eq_16)11727   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_eq_16) {
11728     TEST_REQUIRES_ARM_NEON;
11729     GemmMicrokernelTester()
11730       .mr(4)
11731       .nr(8)
11732       .kr(4)
11733       .sr(1)
11734       .m(4)
11735       .n(8)
11736       .k(16)
11737       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11738   }
11739 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,strided_cn)11740   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, strided_cn) {
11741     TEST_REQUIRES_ARM_NEON;
11742     GemmMicrokernelTester()
11743       .mr(4)
11744       .nr(8)
11745       .kr(4)
11746       .sr(1)
11747       .m(4)
11748       .n(8)
11749       .k(16)
11750       .cn_stride(11)
11751       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11752   }
11753 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_eq_16_subtile)11754   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_eq_16_subtile) {
11755     TEST_REQUIRES_ARM_NEON;
11756     for (uint32_t n = 1; n <= 8; n++) {
11757       for (uint32_t m = 1; m <= 4; m++) {
11758         GemmMicrokernelTester()
11759           .mr(4)
11760           .nr(8)
11761           .kr(4)
11762           .sr(1)
11763           .m(m)
11764           .n(n)
11765           .k(16)
11766           .iterations(1)
11767           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11768       }
11769     }
11770   }
11771 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_eq_16_subtile_m)11772   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_eq_16_subtile_m) {
11773     TEST_REQUIRES_ARM_NEON;
11774     for (uint32_t m = 1; m <= 4; m++) {
11775       GemmMicrokernelTester()
11776         .mr(4)
11777         .nr(8)
11778         .kr(4)
11779         .sr(1)
11780         .m(m)
11781         .n(8)
11782         .k(16)
11783         .iterations(1)
11784         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11785     }
11786   }
11787 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_eq_16_subtile_n)11788   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_eq_16_subtile_n) {
11789     TEST_REQUIRES_ARM_NEON;
11790     for (uint32_t n = 1; n <= 8; n++) {
11791       GemmMicrokernelTester()
11792         .mr(4)
11793         .nr(8)
11794         .kr(4)
11795         .sr(1)
11796         .m(4)
11797         .n(n)
11798         .k(16)
11799         .iterations(1)
11800         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11801     }
11802   }
11803 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_lt_16)11804   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_lt_16) {
11805     TEST_REQUIRES_ARM_NEON;
11806     for (size_t k = 1; k < 16; k++) {
11807       GemmMicrokernelTester()
11808         .mr(4)
11809         .nr(8)
11810         .kr(4)
11811         .sr(1)
11812         .m(4)
11813         .n(8)
11814         .k(k)
11815         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11816     }
11817   }
11818 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_lt_16_subtile)11819   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_lt_16_subtile) {
11820     TEST_REQUIRES_ARM_NEON;
11821     for (size_t k = 1; k < 16; k++) {
11822       for (uint32_t n = 1; n <= 8; n++) {
11823         for (uint32_t m = 1; m <= 4; m++) {
11824           GemmMicrokernelTester()
11825             .mr(4)
11826             .nr(8)
11827             .kr(4)
11828             .sr(1)
11829             .m(m)
11830             .n(n)
11831             .k(k)
11832             .iterations(1)
11833             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11834         }
11835       }
11836     }
11837   }
11838 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_gt_16)11839   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_gt_16) {
11840     TEST_REQUIRES_ARM_NEON;
11841     for (size_t k = 17; k < 32; k++) {
11842       GemmMicrokernelTester()
11843         .mr(4)
11844         .nr(8)
11845         .kr(4)
11846         .sr(1)
11847         .m(4)
11848         .n(8)
11849         .k(k)
11850         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11851     }
11852   }
11853 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_gt_16_subtile)11854   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_gt_16_subtile) {
11855     TEST_REQUIRES_ARM_NEON;
11856     for (size_t k = 17; k < 32; k++) {
11857       for (uint32_t n = 1; n <= 8; n++) {
11858         for (uint32_t m = 1; m <= 4; m++) {
11859           GemmMicrokernelTester()
11860             .mr(4)
11861             .nr(8)
11862             .kr(4)
11863             .sr(1)
11864             .m(m)
11865             .n(n)
11866             .k(k)
11867             .iterations(1)
11868             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11869         }
11870       }
11871     }
11872   }
11873 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_div_16)11874   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_div_16) {
11875     TEST_REQUIRES_ARM_NEON;
11876     for (size_t k = 32; k <= 160; k += 16) {
11877       GemmMicrokernelTester()
11878         .mr(4)
11879         .nr(8)
11880         .kr(4)
11881         .sr(1)
11882         .m(4)
11883         .n(8)
11884         .k(k)
11885         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11886     }
11887   }
11888 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_div_16_subtile)11889   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_div_16_subtile) {
11890     TEST_REQUIRES_ARM_NEON;
11891     for (size_t k = 32; k <= 160; k += 16) {
11892       for (uint32_t n = 1; n <= 8; n++) {
11893         for (uint32_t m = 1; m <= 4; m++) {
11894           GemmMicrokernelTester()
11895             .mr(4)
11896             .nr(8)
11897             .kr(4)
11898             .sr(1)
11899             .m(m)
11900             .n(n)
11901             .k(k)
11902             .iterations(1)
11903             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11904         }
11905       }
11906     }
11907   }
11908 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,n_gt_8)11909   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_gt_8) {
11910     TEST_REQUIRES_ARM_NEON;
11911     for (uint32_t n = 9; n < 16; n++) {
11912       for (size_t k = 1; k <= 80; k += 17) {
11913         GemmMicrokernelTester()
11914           .mr(4)
11915           .nr(8)
11916           .kr(4)
11917           .sr(1)
11918           .m(4)
11919           .n(n)
11920           .k(k)
11921           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11922       }
11923     }
11924   }
11925 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,n_gt_8_strided_cn)11926   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_gt_8_strided_cn) {
11927     TEST_REQUIRES_ARM_NEON;
11928     for (uint32_t n = 9; n < 16; n++) {
11929       for (size_t k = 1; k <= 80; k += 17) {
11930         GemmMicrokernelTester()
11931           .mr(4)
11932           .nr(8)
11933           .kr(4)
11934           .sr(1)
11935           .m(4)
11936           .n(n)
11937           .k(k)
11938           .cn_stride(11)
11939           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11940       }
11941     }
11942   }
11943 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,n_gt_8_subtile)11944   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_gt_8_subtile) {
11945     TEST_REQUIRES_ARM_NEON;
11946     for (uint32_t n = 9; n < 16; n++) {
11947       for (size_t k = 1; k <= 80; k += 17) {
11948         for (uint32_t m = 1; m <= 4; m++) {
11949           GemmMicrokernelTester()
11950             .mr(4)
11951             .nr(8)
11952             .kr(4)
11953             .sr(1)
11954             .m(m)
11955             .n(n)
11956             .k(k)
11957             .iterations(1)
11958             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11959         }
11960       }
11961     }
11962   }
11963 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,n_div_8)11964   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_div_8) {
11965     TEST_REQUIRES_ARM_NEON;
11966     for (uint32_t n = 16; n <= 24; n += 8) {
11967       for (size_t k = 1; k <= 80; k += 17) {
11968         GemmMicrokernelTester()
11969           .mr(4)
11970           .nr(8)
11971           .kr(4)
11972           .sr(1)
11973           .m(4)
11974           .n(n)
11975           .k(k)
11976           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11977       }
11978     }
11979   }
11980 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,n_div_8_strided_cn)11981   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_div_8_strided_cn) {
11982     TEST_REQUIRES_ARM_NEON;
11983     for (uint32_t n = 16; n <= 24; n += 8) {
11984       for (size_t k = 1; k <= 80; k += 17) {
11985         GemmMicrokernelTester()
11986           .mr(4)
11987           .nr(8)
11988           .kr(4)
11989           .sr(1)
11990           .m(4)
11991           .n(n)
11992           .k(k)
11993           .cn_stride(11)
11994           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
11995       }
11996     }
11997   }
11998 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,n_div_8_subtile)11999   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_div_8_subtile) {
12000     TEST_REQUIRES_ARM_NEON;
12001     for (uint32_t n = 16; n <= 24; n += 8) {
12002       for (size_t k = 1; k <= 80; k += 17) {
12003         for (uint32_t m = 1; m <= 4; m++) {
12004           GemmMicrokernelTester()
12005             .mr(4)
12006             .nr(8)
12007             .kr(4)
12008             .sr(1)
12009             .m(m)
12010             .n(n)
12011             .k(k)
12012             .iterations(1)
12013             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12014         }
12015       }
12016     }
12017   }
12018 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,small_kernel)12019   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, small_kernel) {
12020     TEST_REQUIRES_ARM_NEON;
12021     for (size_t k = 1; k <= 80; k += 17) {
12022       GemmMicrokernelTester()
12023         .mr(4)
12024         .nr(8)
12025         .kr(4)
12026         .sr(1)
12027         .m(4)
12028         .n(8)
12029         .k(k)
12030         .ks(3)
12031         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12032     }
12033   }
12034 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,small_kernel_subtile)12035   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, small_kernel_subtile) {
12036     TEST_REQUIRES_ARM_NEON;
12037     for (size_t k = 1; k <= 80; k += 17) {
12038       for (uint32_t n = 1; n <= 8; n++) {
12039         for (uint32_t m = 1; m <= 4; m++) {
12040           GemmMicrokernelTester()
12041             .mr(4)
12042             .nr(8)
12043             .kr(4)
12044             .sr(1)
12045             .m(m)
12046             .n(n)
12047             .k(k)
12048             .ks(3)
12049             .iterations(1)
12050             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12051         }
12052       }
12053     }
12054   }
12055 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,n_gt_8_small_kernel)12056   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_gt_8_small_kernel) {
12057     TEST_REQUIRES_ARM_NEON;
12058     for (uint32_t n = 9; n < 16; n++) {
12059       for (size_t k = 1; k <= 80; k += 17) {
12060         GemmMicrokernelTester()
12061           .mr(4)
12062           .nr(8)
12063           .kr(4)
12064           .sr(1)
12065           .m(4)
12066           .n(n)
12067           .k(k)
12068           .ks(3)
12069           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12070       }
12071     }
12072   }
12073 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,n_div_8_small_kernel)12074   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_div_8_small_kernel) {
12075     TEST_REQUIRES_ARM_NEON;
12076     for (uint32_t n = 16; n <= 24; n += 8) {
12077       for (size_t k = 1; k <= 80; k += 17) {
12078         GemmMicrokernelTester()
12079           .mr(4)
12080           .nr(8)
12081           .kr(4)
12082           .sr(1)
12083           .m(4)
12084           .n(n)
12085           .k(k)
12086           .ks(3)
12087           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12088       }
12089     }
12090   }
12091 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,strided_cm_subtile)12092   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, strided_cm_subtile) {
12093     TEST_REQUIRES_ARM_NEON;
12094     for (size_t k = 1; k <= 80; k += 17) {
12095       for (uint32_t n = 1; n <= 8; n++) {
12096         for (uint32_t m = 1; m <= 4; m++) {
12097           GemmMicrokernelTester()
12098             .mr(4)
12099             .nr(8)
12100             .kr(4)
12101             .sr(1)
12102             .m(m)
12103             .n(n)
12104             .k(k)
12105             .cm_stride(11)
12106             .iterations(1)
12107             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12108         }
12109       }
12110     }
12111   }
12112 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,a_offset)12113   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, a_offset) {
12114     TEST_REQUIRES_ARM_NEON;
12115     for (size_t k = 1; k <= 80; k += 17) {
12116       GemmMicrokernelTester()
12117         .mr(4)
12118         .nr(8)
12119         .kr(4)
12120         .sr(1)
12121         .m(4)
12122         .n(8)
12123         .k(k)
12124         .ks(3)
12125         .a_offset(331)
12126         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12127     }
12128   }
12129 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,zero)12130   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, zero) {
12131     TEST_REQUIRES_ARM_NEON;
12132     for (size_t k = 1; k <= 80; k += 17) {
12133       for (uint32_t mz = 0; mz < 4; mz++) {
12134         GemmMicrokernelTester()
12135           .mr(4)
12136           .nr(8)
12137           .kr(4)
12138           .sr(1)
12139           .m(4)
12140           .n(8)
12141           .k(k)
12142           .ks(3)
12143           .a_offset(331)
12144           .zero_index(mz)
12145           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12146       }
12147     }
12148   }
12149 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,qmin)12150   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, qmin) {
12151     TEST_REQUIRES_ARM_NEON;
12152     GemmMicrokernelTester()
12153       .mr(4)
12154       .nr(8)
12155       .kr(4)
12156       .sr(1)
12157       .m(4)
12158       .n(8)
12159       .k(16)
12160       .qmin(128)
12161       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12162   }
12163 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,qmax)12164   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, qmax) {
12165     TEST_REQUIRES_ARM_NEON;
12166     GemmMicrokernelTester()
12167       .mr(4)
12168       .nr(8)
12169       .kr(4)
12170       .sr(1)
12171       .m(4)
12172       .n(8)
12173       .k(16)
12174       .qmax(128)
12175       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12176   }
12177 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,strided_cm)12178   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, strided_cm) {
12179     TEST_REQUIRES_ARM_NEON;
12180     GemmMicrokernelTester()
12181       .mr(4)
12182       .nr(8)
12183       .kr(4)
12184       .sr(1)
12185       .m(4)
12186       .n(8)
12187       .k(16)
12188       .cm_stride(11)
12189       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12190   }
12191 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
12192 
12193 
12194 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,k_eq_16)12195   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_eq_16) {
12196     TEST_REQUIRES_ARM_NEON;
12197     GemmMicrokernelTester()
12198       .mr(2)
12199       .nr(16)
12200       .kr(4)
12201       .sr(1)
12202       .m(2)
12203       .n(16)
12204       .k(16)
12205       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12206   }
12207 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,strided_cn)12208   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, strided_cn) {
12209     TEST_REQUIRES_ARM_NEON;
12210     GemmMicrokernelTester()
12211       .mr(2)
12212       .nr(16)
12213       .kr(4)
12214       .sr(1)
12215       .m(2)
12216       .n(16)
12217       .k(16)
12218       .cn_stride(19)
12219       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12220   }
12221 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,k_eq_16_subtile)12222   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_eq_16_subtile) {
12223     TEST_REQUIRES_ARM_NEON;
12224     for (uint32_t n = 1; n <= 16; n++) {
12225       for (uint32_t m = 1; m <= 2; m++) {
12226         GemmMicrokernelTester()
12227           .mr(2)
12228           .nr(16)
12229           .kr(4)
12230           .sr(1)
12231           .m(m)
12232           .n(n)
12233           .k(16)
12234           .iterations(1)
12235           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12236       }
12237     }
12238   }
12239 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,k_eq_16_subtile_m)12240   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_eq_16_subtile_m) {
12241     TEST_REQUIRES_ARM_NEON;
12242     for (uint32_t m = 1; m <= 2; m++) {
12243       GemmMicrokernelTester()
12244         .mr(2)
12245         .nr(16)
12246         .kr(4)
12247         .sr(1)
12248         .m(m)
12249         .n(16)
12250         .k(16)
12251         .iterations(1)
12252         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12253     }
12254   }
12255 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,k_eq_16_subtile_n)12256   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_eq_16_subtile_n) {
12257     TEST_REQUIRES_ARM_NEON;
12258     for (uint32_t n = 1; n <= 16; n++) {
12259       GemmMicrokernelTester()
12260         .mr(2)
12261         .nr(16)
12262         .kr(4)
12263         .sr(1)
12264         .m(2)
12265         .n(n)
12266         .k(16)
12267         .iterations(1)
12268         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12269     }
12270   }
12271 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,k_lt_16)12272   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_lt_16) {
12273     TEST_REQUIRES_ARM_NEON;
12274     for (size_t k = 1; k < 16; k++) {
12275       GemmMicrokernelTester()
12276         .mr(2)
12277         .nr(16)
12278         .kr(4)
12279         .sr(1)
12280         .m(2)
12281         .n(16)
12282         .k(k)
12283         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12284     }
12285   }
12286 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,k_lt_16_subtile)12287   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_lt_16_subtile) {
12288     TEST_REQUIRES_ARM_NEON;
12289     for (size_t k = 1; k < 16; k++) {
12290       for (uint32_t n = 1; n <= 16; n++) {
12291         for (uint32_t m = 1; m <= 2; m++) {
12292           GemmMicrokernelTester()
12293             .mr(2)
12294             .nr(16)
12295             .kr(4)
12296             .sr(1)
12297             .m(m)
12298             .n(n)
12299             .k(k)
12300             .iterations(1)
12301             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12302         }
12303       }
12304     }
12305   }
12306 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,k_gt_16)12307   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_gt_16) {
12308     TEST_REQUIRES_ARM_NEON;
12309     for (size_t k = 17; k < 32; k++) {
12310       GemmMicrokernelTester()
12311         .mr(2)
12312         .nr(16)
12313         .kr(4)
12314         .sr(1)
12315         .m(2)
12316         .n(16)
12317         .k(k)
12318         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12319     }
12320   }
12321 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,k_gt_16_subtile)12322   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_gt_16_subtile) {
12323     TEST_REQUIRES_ARM_NEON;
12324     for (size_t k = 17; k < 32; k++) {
12325       for (uint32_t n = 1; n <= 16; n++) {
12326         for (uint32_t m = 1; m <= 2; m++) {
12327           GemmMicrokernelTester()
12328             .mr(2)
12329             .nr(16)
12330             .kr(4)
12331             .sr(1)
12332             .m(m)
12333             .n(n)
12334             .k(k)
12335             .iterations(1)
12336             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12337         }
12338       }
12339     }
12340   }
12341 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,k_div_16)12342   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_div_16) {
12343     TEST_REQUIRES_ARM_NEON;
12344     for (size_t k = 32; k <= 160; k += 16) {
12345       GemmMicrokernelTester()
12346         .mr(2)
12347         .nr(16)
12348         .kr(4)
12349         .sr(1)
12350         .m(2)
12351         .n(16)
12352         .k(k)
12353         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12354     }
12355   }
12356 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,k_div_16_subtile)12357   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_div_16_subtile) {
12358     TEST_REQUIRES_ARM_NEON;
12359     for (size_t k = 32; k <= 160; k += 16) {
12360       for (uint32_t n = 1; n <= 16; n++) {
12361         for (uint32_t m = 1; m <= 2; m++) {
12362           GemmMicrokernelTester()
12363             .mr(2)
12364             .nr(16)
12365             .kr(4)
12366             .sr(1)
12367             .m(m)
12368             .n(n)
12369             .k(k)
12370             .iterations(1)
12371             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12372         }
12373       }
12374     }
12375   }
12376 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,n_gt_16)12377   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_gt_16) {
12378     TEST_REQUIRES_ARM_NEON;
12379     for (uint32_t n = 17; n < 32; n++) {
12380       for (size_t k = 1; k <= 80; k += 17) {
12381         GemmMicrokernelTester()
12382           .mr(2)
12383           .nr(16)
12384           .kr(4)
12385           .sr(1)
12386           .m(2)
12387           .n(n)
12388           .k(k)
12389           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12390       }
12391     }
12392   }
12393 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,n_gt_16_strided_cn)12394   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_gt_16_strided_cn) {
12395     TEST_REQUIRES_ARM_NEON;
12396     for (uint32_t n = 17; n < 32; n++) {
12397       for (size_t k = 1; k <= 80; k += 17) {
12398         GemmMicrokernelTester()
12399           .mr(2)
12400           .nr(16)
12401           .kr(4)
12402           .sr(1)
12403           .m(2)
12404           .n(n)
12405           .k(k)
12406           .cn_stride(19)
12407           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12408       }
12409     }
12410   }
12411 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,n_gt_16_subtile)12412   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_gt_16_subtile) {
12413     TEST_REQUIRES_ARM_NEON;
12414     for (uint32_t n = 17; n < 32; n++) {
12415       for (size_t k = 1; k <= 80; k += 17) {
12416         for (uint32_t m = 1; m <= 2; m++) {
12417           GemmMicrokernelTester()
12418             .mr(2)
12419             .nr(16)
12420             .kr(4)
12421             .sr(1)
12422             .m(m)
12423             .n(n)
12424             .k(k)
12425             .iterations(1)
12426             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12427         }
12428       }
12429     }
12430   }
12431 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,n_div_16)12432   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_div_16) {
12433     TEST_REQUIRES_ARM_NEON;
12434     for (uint32_t n = 32; n <= 48; n += 16) {
12435       for (size_t k = 1; k <= 80; k += 17) {
12436         GemmMicrokernelTester()
12437           .mr(2)
12438           .nr(16)
12439           .kr(4)
12440           .sr(1)
12441           .m(2)
12442           .n(n)
12443           .k(k)
12444           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12445       }
12446     }
12447   }
12448 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,n_div_16_strided_cn)12449   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_div_16_strided_cn) {
12450     TEST_REQUIRES_ARM_NEON;
12451     for (uint32_t n = 32; n <= 48; n += 16) {
12452       for (size_t k = 1; k <= 80; k += 17) {
12453         GemmMicrokernelTester()
12454           .mr(2)
12455           .nr(16)
12456           .kr(4)
12457           .sr(1)
12458           .m(2)
12459           .n(n)
12460           .k(k)
12461           .cn_stride(19)
12462           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12463       }
12464     }
12465   }
12466 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,n_div_16_subtile)12467   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_div_16_subtile) {
12468     TEST_REQUIRES_ARM_NEON;
12469     for (uint32_t n = 32; n <= 48; n += 16) {
12470       for (size_t k = 1; k <= 80; k += 17) {
12471         for (uint32_t m = 1; m <= 2; m++) {
12472           GemmMicrokernelTester()
12473             .mr(2)
12474             .nr(16)
12475             .kr(4)
12476             .sr(1)
12477             .m(m)
12478             .n(n)
12479             .k(k)
12480             .iterations(1)
12481             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12482         }
12483       }
12484     }
12485   }
12486 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,small_kernel)12487   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, small_kernel) {
12488     TEST_REQUIRES_ARM_NEON;
12489     for (size_t k = 1; k <= 80; k += 17) {
12490       GemmMicrokernelTester()
12491         .mr(2)
12492         .nr(16)
12493         .kr(4)
12494         .sr(1)
12495         .m(2)
12496         .n(16)
12497         .k(k)
12498         .ks(3)
12499         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12500     }
12501   }
12502 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,small_kernel_subtile)12503   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, small_kernel_subtile) {
12504     TEST_REQUIRES_ARM_NEON;
12505     for (size_t k = 1; k <= 80; k += 17) {
12506       for (uint32_t n = 1; n <= 16; n++) {
12507         for (uint32_t m = 1; m <= 2; m++) {
12508           GemmMicrokernelTester()
12509             .mr(2)
12510             .nr(16)
12511             .kr(4)
12512             .sr(1)
12513             .m(m)
12514             .n(n)
12515             .k(k)
12516             .ks(3)
12517             .iterations(1)
12518             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12519         }
12520       }
12521     }
12522   }
12523 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,n_gt_16_small_kernel)12524   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_gt_16_small_kernel) {
12525     TEST_REQUIRES_ARM_NEON;
12526     for (uint32_t n = 17; n < 32; n++) {
12527       for (size_t k = 1; k <= 80; k += 17) {
12528         GemmMicrokernelTester()
12529           .mr(2)
12530           .nr(16)
12531           .kr(4)
12532           .sr(1)
12533           .m(2)
12534           .n(n)
12535           .k(k)
12536           .ks(3)
12537           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12538       }
12539     }
12540   }
12541 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,n_div_16_small_kernel)12542   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_div_16_small_kernel) {
12543     TEST_REQUIRES_ARM_NEON;
12544     for (uint32_t n = 32; n <= 48; n += 16) {
12545       for (size_t k = 1; k <= 80; k += 17) {
12546         GemmMicrokernelTester()
12547           .mr(2)
12548           .nr(16)
12549           .kr(4)
12550           .sr(1)
12551           .m(2)
12552           .n(n)
12553           .k(k)
12554           .ks(3)
12555           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12556       }
12557     }
12558   }
12559 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,strided_cm_subtile)12560   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, strided_cm_subtile) {
12561     TEST_REQUIRES_ARM_NEON;
12562     for (size_t k = 1; k <= 80; k += 17) {
12563       for (uint32_t n = 1; n <= 16; n++) {
12564         for (uint32_t m = 1; m <= 2; m++) {
12565           GemmMicrokernelTester()
12566             .mr(2)
12567             .nr(16)
12568             .kr(4)
12569             .sr(1)
12570             .m(m)
12571             .n(n)
12572             .k(k)
12573             .cm_stride(19)
12574             .iterations(1)
12575             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12576         }
12577       }
12578     }
12579   }
12580 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,a_offset)12581   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, a_offset) {
12582     TEST_REQUIRES_ARM_NEON;
12583     for (size_t k = 1; k <= 80; k += 17) {
12584       GemmMicrokernelTester()
12585         .mr(2)
12586         .nr(16)
12587         .kr(4)
12588         .sr(1)
12589         .m(2)
12590         .n(16)
12591         .k(k)
12592         .ks(3)
12593         .a_offset(163)
12594         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12595     }
12596   }
12597 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,zero)12598   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, zero) {
12599     TEST_REQUIRES_ARM_NEON;
12600     for (size_t k = 1; k <= 80; k += 17) {
12601       for (uint32_t mz = 0; mz < 2; mz++) {
12602         GemmMicrokernelTester()
12603           .mr(2)
12604           .nr(16)
12605           .kr(4)
12606           .sr(1)
12607           .m(2)
12608           .n(16)
12609           .k(k)
12610           .ks(3)
12611           .a_offset(163)
12612           .zero_index(mz)
12613           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12614       }
12615     }
12616   }
12617 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,qmin)12618   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, qmin) {
12619     TEST_REQUIRES_ARM_NEON;
12620     GemmMicrokernelTester()
12621       .mr(2)
12622       .nr(16)
12623       .kr(4)
12624       .sr(1)
12625       .m(2)
12626       .n(16)
12627       .k(16)
12628       .qmin(128)
12629       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12630   }
12631 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,qmax)12632   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, qmax) {
12633     TEST_REQUIRES_ARM_NEON;
12634     GemmMicrokernelTester()
12635       .mr(2)
12636       .nr(16)
12637       .kr(4)
12638       .sr(1)
12639       .m(2)
12640       .n(16)
12641       .k(16)
12642       .qmax(128)
12643       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12644   }
12645 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP,strided_cm)12646   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, strided_cm) {
12647     TEST_REQUIRES_ARM_NEON;
12648     GemmMicrokernelTester()
12649       .mr(2)
12650       .nr(16)
12651       .kr(4)
12652       .sr(1)
12653       .m(2)
12654       .n(16)
12655       .k(16)
12656       .cm_stride(19)
12657       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12658   }
12659 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
12660 
12661 
12662 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,k_eq_8)12663   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_eq_8) {
12664     TEST_REQUIRES_ARM_NEON;
12665     GemmMicrokernelTester()
12666       .mr(2)
12667       .nr(8)
12668       .kr(4)
12669       .sr(1)
12670       .m(2)
12671       .n(8)
12672       .k(8)
12673       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12674   }
12675 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,strided_cn)12676   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, strided_cn) {
12677     TEST_REQUIRES_ARM_NEON;
12678     GemmMicrokernelTester()
12679       .mr(2)
12680       .nr(8)
12681       .kr(4)
12682       .sr(1)
12683       .m(2)
12684       .n(8)
12685       .k(8)
12686       .cn_stride(11)
12687       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12688   }
12689 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,k_eq_8_subtile)12690   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_eq_8_subtile) {
12691     TEST_REQUIRES_ARM_NEON;
12692     for (uint32_t n = 1; n <= 8; n++) {
12693       for (uint32_t m = 1; m <= 2; m++) {
12694         GemmMicrokernelTester()
12695           .mr(2)
12696           .nr(8)
12697           .kr(4)
12698           .sr(1)
12699           .m(m)
12700           .n(n)
12701           .k(8)
12702           .iterations(1)
12703           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12704       }
12705     }
12706   }
12707 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,k_eq_8_subtile_m)12708   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_eq_8_subtile_m) {
12709     TEST_REQUIRES_ARM_NEON;
12710     for (uint32_t m = 1; m <= 2; m++) {
12711       GemmMicrokernelTester()
12712         .mr(2)
12713         .nr(8)
12714         .kr(4)
12715         .sr(1)
12716         .m(m)
12717         .n(8)
12718         .k(8)
12719         .iterations(1)
12720         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12721     }
12722   }
12723 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,k_eq_8_subtile_n)12724   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_eq_8_subtile_n) {
12725     TEST_REQUIRES_ARM_NEON;
12726     for (uint32_t n = 1; n <= 8; n++) {
12727       GemmMicrokernelTester()
12728         .mr(2)
12729         .nr(8)
12730         .kr(4)
12731         .sr(1)
12732         .m(2)
12733         .n(n)
12734         .k(8)
12735         .iterations(1)
12736         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12737     }
12738   }
12739 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,k_lt_8)12740   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_lt_8) {
12741     TEST_REQUIRES_ARM_NEON;
12742     for (size_t k = 1; k < 8; k++) {
12743       GemmMicrokernelTester()
12744         .mr(2)
12745         .nr(8)
12746         .kr(4)
12747         .sr(1)
12748         .m(2)
12749         .n(8)
12750         .k(k)
12751         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12752     }
12753   }
12754 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,k_lt_8_subtile)12755   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_lt_8_subtile) {
12756     TEST_REQUIRES_ARM_NEON;
12757     for (size_t k = 1; k < 8; k++) {
12758       for (uint32_t n = 1; n <= 8; n++) {
12759         for (uint32_t m = 1; m <= 2; m++) {
12760           GemmMicrokernelTester()
12761             .mr(2)
12762             .nr(8)
12763             .kr(4)
12764             .sr(1)
12765             .m(m)
12766             .n(n)
12767             .k(k)
12768             .iterations(1)
12769             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12770         }
12771       }
12772     }
12773   }
12774 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,k_gt_8)12775   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_gt_8) {
12776     TEST_REQUIRES_ARM_NEON;
12777     for (size_t k = 9; k < 16; k++) {
12778       GemmMicrokernelTester()
12779         .mr(2)
12780         .nr(8)
12781         .kr(4)
12782         .sr(1)
12783         .m(2)
12784         .n(8)
12785         .k(k)
12786         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12787     }
12788   }
12789 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,k_gt_8_subtile)12790   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_gt_8_subtile) {
12791     TEST_REQUIRES_ARM_NEON;
12792     for (size_t k = 9; k < 16; k++) {
12793       for (uint32_t n = 1; n <= 8; n++) {
12794         for (uint32_t m = 1; m <= 2; m++) {
12795           GemmMicrokernelTester()
12796             .mr(2)
12797             .nr(8)
12798             .kr(4)
12799             .sr(1)
12800             .m(m)
12801             .n(n)
12802             .k(k)
12803             .iterations(1)
12804             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12805         }
12806       }
12807     }
12808   }
12809 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,k_div_8)12810   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_div_8) {
12811     TEST_REQUIRES_ARM_NEON;
12812     for (size_t k = 16; k <= 80; k += 8) {
12813       GemmMicrokernelTester()
12814         .mr(2)
12815         .nr(8)
12816         .kr(4)
12817         .sr(1)
12818         .m(2)
12819         .n(8)
12820         .k(k)
12821         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12822     }
12823   }
12824 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,k_div_8_subtile)12825   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_div_8_subtile) {
12826     TEST_REQUIRES_ARM_NEON;
12827     for (size_t k = 16; k <= 80; k += 8) {
12828       for (uint32_t n = 1; n <= 8; n++) {
12829         for (uint32_t m = 1; m <= 2; m++) {
12830           GemmMicrokernelTester()
12831             .mr(2)
12832             .nr(8)
12833             .kr(4)
12834             .sr(1)
12835             .m(m)
12836             .n(n)
12837             .k(k)
12838             .iterations(1)
12839             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12840         }
12841       }
12842     }
12843   }
12844 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,n_gt_8)12845   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_gt_8) {
12846     TEST_REQUIRES_ARM_NEON;
12847     for (uint32_t n = 9; n < 16; n++) {
12848       for (size_t k = 1; k <= 40; k += 9) {
12849         GemmMicrokernelTester()
12850           .mr(2)
12851           .nr(8)
12852           .kr(4)
12853           .sr(1)
12854           .m(2)
12855           .n(n)
12856           .k(k)
12857           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12858       }
12859     }
12860   }
12861 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,n_gt_8_strided_cn)12862   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_gt_8_strided_cn) {
12863     TEST_REQUIRES_ARM_NEON;
12864     for (uint32_t n = 9; n < 16; n++) {
12865       for (size_t k = 1; k <= 40; k += 9) {
12866         GemmMicrokernelTester()
12867           .mr(2)
12868           .nr(8)
12869           .kr(4)
12870           .sr(1)
12871           .m(2)
12872           .n(n)
12873           .k(k)
12874           .cn_stride(11)
12875           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12876       }
12877     }
12878   }
12879 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,n_gt_8_subtile)12880   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_gt_8_subtile) {
12881     TEST_REQUIRES_ARM_NEON;
12882     for (uint32_t n = 9; n < 16; n++) {
12883       for (size_t k = 1; k <= 40; k += 9) {
12884         for (uint32_t m = 1; m <= 2; m++) {
12885           GemmMicrokernelTester()
12886             .mr(2)
12887             .nr(8)
12888             .kr(4)
12889             .sr(1)
12890             .m(m)
12891             .n(n)
12892             .k(k)
12893             .iterations(1)
12894             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12895         }
12896       }
12897     }
12898   }
12899 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,n_div_8)12900   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_div_8) {
12901     TEST_REQUIRES_ARM_NEON;
12902     for (uint32_t n = 16; n <= 24; n += 8) {
12903       for (size_t k = 1; k <= 40; k += 9) {
12904         GemmMicrokernelTester()
12905           .mr(2)
12906           .nr(8)
12907           .kr(4)
12908           .sr(1)
12909           .m(2)
12910           .n(n)
12911           .k(k)
12912           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12913       }
12914     }
12915   }
12916 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,n_div_8_strided_cn)12917   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_div_8_strided_cn) {
12918     TEST_REQUIRES_ARM_NEON;
12919     for (uint32_t n = 16; n <= 24; n += 8) {
12920       for (size_t k = 1; k <= 40; k += 9) {
12921         GemmMicrokernelTester()
12922           .mr(2)
12923           .nr(8)
12924           .kr(4)
12925           .sr(1)
12926           .m(2)
12927           .n(n)
12928           .k(k)
12929           .cn_stride(11)
12930           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12931       }
12932     }
12933   }
12934 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,n_div_8_subtile)12935   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_div_8_subtile) {
12936     TEST_REQUIRES_ARM_NEON;
12937     for (uint32_t n = 16; n <= 24; n += 8) {
12938       for (size_t k = 1; k <= 40; k += 9) {
12939         for (uint32_t m = 1; m <= 2; m++) {
12940           GemmMicrokernelTester()
12941             .mr(2)
12942             .nr(8)
12943             .kr(4)
12944             .sr(1)
12945             .m(m)
12946             .n(n)
12947             .k(k)
12948             .iterations(1)
12949             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12950         }
12951       }
12952     }
12953   }
12954 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,small_kernel)12955   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, small_kernel) {
12956     TEST_REQUIRES_ARM_NEON;
12957     for (size_t k = 1; k <= 40; k += 9) {
12958       GemmMicrokernelTester()
12959         .mr(2)
12960         .nr(8)
12961         .kr(4)
12962         .sr(1)
12963         .m(2)
12964         .n(8)
12965         .k(k)
12966         .ks(3)
12967         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12968     }
12969   }
12970 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,small_kernel_subtile)12971   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, small_kernel_subtile) {
12972     TEST_REQUIRES_ARM_NEON;
12973     for (size_t k = 1; k <= 40; k += 9) {
12974       for (uint32_t n = 1; n <= 8; n++) {
12975         for (uint32_t m = 1; m <= 2; m++) {
12976           GemmMicrokernelTester()
12977             .mr(2)
12978             .nr(8)
12979             .kr(4)
12980             .sr(1)
12981             .m(m)
12982             .n(n)
12983             .k(k)
12984             .ks(3)
12985             .iterations(1)
12986             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12987         }
12988       }
12989     }
12990   }
12991 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,n_gt_8_small_kernel)12992   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_gt_8_small_kernel) {
12993     TEST_REQUIRES_ARM_NEON;
12994     for (uint32_t n = 9; n < 16; n++) {
12995       for (size_t k = 1; k <= 40; k += 9) {
12996         GemmMicrokernelTester()
12997           .mr(2)
12998           .nr(8)
12999           .kr(4)
13000           .sr(1)
13001           .m(2)
13002           .n(n)
13003           .k(k)
13004           .ks(3)
13005           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13006       }
13007     }
13008   }
13009 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,n_div_8_small_kernel)13010   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_div_8_small_kernel) {
13011     TEST_REQUIRES_ARM_NEON;
13012     for (uint32_t n = 16; n <= 24; n += 8) {
13013       for (size_t k = 1; k <= 40; k += 9) {
13014         GemmMicrokernelTester()
13015           .mr(2)
13016           .nr(8)
13017           .kr(4)
13018           .sr(1)
13019           .m(2)
13020           .n(n)
13021           .k(k)
13022           .ks(3)
13023           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13024       }
13025     }
13026   }
13027 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,strided_cm_subtile)13028   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, strided_cm_subtile) {
13029     TEST_REQUIRES_ARM_NEON;
13030     for (size_t k = 1; k <= 40; k += 9) {
13031       for (uint32_t n = 1; n <= 8; n++) {
13032         for (uint32_t m = 1; m <= 2; m++) {
13033           GemmMicrokernelTester()
13034             .mr(2)
13035             .nr(8)
13036             .kr(4)
13037             .sr(1)
13038             .m(m)
13039             .n(n)
13040             .k(k)
13041             .cm_stride(11)
13042             .iterations(1)
13043             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13044         }
13045       }
13046     }
13047   }
13048 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,a_offset)13049   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, a_offset) {
13050     TEST_REQUIRES_ARM_NEON;
13051     for (size_t k = 1; k <= 40; k += 9) {
13052       GemmMicrokernelTester()
13053         .mr(2)
13054         .nr(8)
13055         .kr(4)
13056         .sr(1)
13057         .m(2)
13058         .n(8)
13059         .k(k)
13060         .ks(3)
13061         .a_offset(83)
13062         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13063     }
13064   }
13065 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,zero)13066   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, zero) {
13067     TEST_REQUIRES_ARM_NEON;
13068     for (size_t k = 1; k <= 40; k += 9) {
13069       for (uint32_t mz = 0; mz < 2; mz++) {
13070         GemmMicrokernelTester()
13071           .mr(2)
13072           .nr(8)
13073           .kr(4)
13074           .sr(1)
13075           .m(2)
13076           .n(8)
13077           .k(k)
13078           .ks(3)
13079           .a_offset(83)
13080           .zero_index(mz)
13081           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13082       }
13083     }
13084   }
13085 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,qmin)13086   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, qmin) {
13087     TEST_REQUIRES_ARM_NEON;
13088     GemmMicrokernelTester()
13089       .mr(2)
13090       .nr(8)
13091       .kr(4)
13092       .sr(1)
13093       .m(2)
13094       .n(8)
13095       .k(8)
13096       .qmin(128)
13097       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13098   }
13099 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,qmax)13100   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, qmax) {
13101     TEST_REQUIRES_ARM_NEON;
13102     GemmMicrokernelTester()
13103       .mr(2)
13104       .nr(8)
13105       .kr(4)
13106       .sr(1)
13107       .m(2)
13108       .n(8)
13109       .k(8)
13110       .qmax(128)
13111       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13112   }
13113 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R,strided_cm)13114   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, strided_cm) {
13115     TEST_REQUIRES_ARM_NEON;
13116     GemmMicrokernelTester()
13117       .mr(2)
13118       .nr(8)
13119       .kr(4)
13120       .sr(1)
13121       .m(2)
13122       .n(8)
13123       .k(8)
13124       .cm_stride(11)
13125       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13126   }
13127 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
13128 
13129 
13130 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,k_eq_16)13131   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_eq_16) {
13132     TEST_REQUIRES_ARM_NEON;
13133     GemmMicrokernelTester()
13134       .mr(2)
13135       .nr(8)
13136       .kr(4)
13137       .sr(1)
13138       .m(2)
13139       .n(8)
13140       .k(16)
13141       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13142   }
13143 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,strided_cn)13144   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, strided_cn) {
13145     TEST_REQUIRES_ARM_NEON;
13146     GemmMicrokernelTester()
13147       .mr(2)
13148       .nr(8)
13149       .kr(4)
13150       .sr(1)
13151       .m(2)
13152       .n(8)
13153       .k(16)
13154       .cn_stride(11)
13155       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13156   }
13157 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,k_eq_16_subtile)13158   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile) {
13159     TEST_REQUIRES_ARM_NEON;
13160     for (uint32_t n = 1; n <= 8; n++) {
13161       for (uint32_t m = 1; m <= 2; m++) {
13162         GemmMicrokernelTester()
13163           .mr(2)
13164           .nr(8)
13165           .kr(4)
13166           .sr(1)
13167           .m(m)
13168           .n(n)
13169           .k(16)
13170           .iterations(1)
13171           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13172       }
13173     }
13174   }
13175 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,k_eq_16_subtile_m)13176   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
13177     TEST_REQUIRES_ARM_NEON;
13178     for (uint32_t m = 1; m <= 2; m++) {
13179       GemmMicrokernelTester()
13180         .mr(2)
13181         .nr(8)
13182         .kr(4)
13183         .sr(1)
13184         .m(m)
13185         .n(8)
13186         .k(16)
13187         .iterations(1)
13188         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13189     }
13190   }
13191 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,k_eq_16_subtile_n)13192   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
13193     TEST_REQUIRES_ARM_NEON;
13194     for (uint32_t n = 1; n <= 8; n++) {
13195       GemmMicrokernelTester()
13196         .mr(2)
13197         .nr(8)
13198         .kr(4)
13199         .sr(1)
13200         .m(2)
13201         .n(n)
13202         .k(16)
13203         .iterations(1)
13204         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13205     }
13206   }
13207 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,k_lt_16)13208   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_lt_16) {
13209     TEST_REQUIRES_ARM_NEON;
13210     for (size_t k = 1; k < 16; k++) {
13211       GemmMicrokernelTester()
13212         .mr(2)
13213         .nr(8)
13214         .kr(4)
13215         .sr(1)
13216         .m(2)
13217         .n(8)
13218         .k(k)
13219         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13220     }
13221   }
13222 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,k_lt_16_subtile)13223   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_lt_16_subtile) {
13224     TEST_REQUIRES_ARM_NEON;
13225     for (size_t k = 1; k < 16; k++) {
13226       for (uint32_t n = 1; n <= 8; n++) {
13227         for (uint32_t m = 1; m <= 2; m++) {
13228           GemmMicrokernelTester()
13229             .mr(2)
13230             .nr(8)
13231             .kr(4)
13232             .sr(1)
13233             .m(m)
13234             .n(n)
13235             .k(k)
13236             .iterations(1)
13237             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13238         }
13239       }
13240     }
13241   }
13242 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,k_gt_16)13243   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_gt_16) {
13244     TEST_REQUIRES_ARM_NEON;
13245     for (size_t k = 17; k < 32; k++) {
13246       GemmMicrokernelTester()
13247         .mr(2)
13248         .nr(8)
13249         .kr(4)
13250         .sr(1)
13251         .m(2)
13252         .n(8)
13253         .k(k)
13254         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13255     }
13256   }
13257 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,k_gt_16_subtile)13258   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_gt_16_subtile) {
13259     TEST_REQUIRES_ARM_NEON;
13260     for (size_t k = 17; k < 32; k++) {
13261       for (uint32_t n = 1; n <= 8; n++) {
13262         for (uint32_t m = 1; m <= 2; m++) {
13263           GemmMicrokernelTester()
13264             .mr(2)
13265             .nr(8)
13266             .kr(4)
13267             .sr(1)
13268             .m(m)
13269             .n(n)
13270             .k(k)
13271             .iterations(1)
13272             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13273         }
13274       }
13275     }
13276   }
13277 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,k_div_16)13278   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_div_16) {
13279     TEST_REQUIRES_ARM_NEON;
13280     for (size_t k = 32; k <= 160; k += 16) {
13281       GemmMicrokernelTester()
13282         .mr(2)
13283         .nr(8)
13284         .kr(4)
13285         .sr(1)
13286         .m(2)
13287         .n(8)
13288         .k(k)
13289         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13290     }
13291   }
13292 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,k_div_16_subtile)13293   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_div_16_subtile) {
13294     TEST_REQUIRES_ARM_NEON;
13295     for (size_t k = 32; k <= 160; k += 16) {
13296       for (uint32_t n = 1; n <= 8; n++) {
13297         for (uint32_t m = 1; m <= 2; m++) {
13298           GemmMicrokernelTester()
13299             .mr(2)
13300             .nr(8)
13301             .kr(4)
13302             .sr(1)
13303             .m(m)
13304             .n(n)
13305             .k(k)
13306             .iterations(1)
13307             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13308         }
13309       }
13310     }
13311   }
13312 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,n_gt_8)13313   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_gt_8) {
13314     TEST_REQUIRES_ARM_NEON;
13315     for (uint32_t n = 9; n < 16; n++) {
13316       for (size_t k = 1; k <= 80; k += 17) {
13317         GemmMicrokernelTester()
13318           .mr(2)
13319           .nr(8)
13320           .kr(4)
13321           .sr(1)
13322           .m(2)
13323           .n(n)
13324           .k(k)
13325           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13326       }
13327     }
13328   }
13329 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,n_gt_8_strided_cn)13330   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
13331     TEST_REQUIRES_ARM_NEON;
13332     for (uint32_t n = 9; n < 16; n++) {
13333       for (size_t k = 1; k <= 80; k += 17) {
13334         GemmMicrokernelTester()
13335           .mr(2)
13336           .nr(8)
13337           .kr(4)
13338           .sr(1)
13339           .m(2)
13340           .n(n)
13341           .k(k)
13342           .cn_stride(11)
13343           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13344       }
13345     }
13346   }
13347 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,n_gt_8_subtile)13348   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_gt_8_subtile) {
13349     TEST_REQUIRES_ARM_NEON;
13350     for (uint32_t n = 9; n < 16; n++) {
13351       for (size_t k = 1; k <= 80; k += 17) {
13352         for (uint32_t m = 1; m <= 2; m++) {
13353           GemmMicrokernelTester()
13354             .mr(2)
13355             .nr(8)
13356             .kr(4)
13357             .sr(1)
13358             .m(m)
13359             .n(n)
13360             .k(k)
13361             .iterations(1)
13362             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13363         }
13364       }
13365     }
13366   }
13367 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,n_div_8)13368   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_div_8) {
13369     TEST_REQUIRES_ARM_NEON;
13370     for (uint32_t n = 16; n <= 24; n += 8) {
13371       for (size_t k = 1; k <= 80; k += 17) {
13372         GemmMicrokernelTester()
13373           .mr(2)
13374           .nr(8)
13375           .kr(4)
13376           .sr(1)
13377           .m(2)
13378           .n(n)
13379           .k(k)
13380           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13381       }
13382     }
13383   }
13384 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,n_div_8_strided_cn)13385   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_div_8_strided_cn) {
13386     TEST_REQUIRES_ARM_NEON;
13387     for (uint32_t n = 16; n <= 24; n += 8) {
13388       for (size_t k = 1; k <= 80; k += 17) {
13389         GemmMicrokernelTester()
13390           .mr(2)
13391           .nr(8)
13392           .kr(4)
13393           .sr(1)
13394           .m(2)
13395           .n(n)
13396           .k(k)
13397           .cn_stride(11)
13398           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13399       }
13400     }
13401   }
13402 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,n_div_8_subtile)13403   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_div_8_subtile) {
13404     TEST_REQUIRES_ARM_NEON;
13405     for (uint32_t n = 16; n <= 24; n += 8) {
13406       for (size_t k = 1; k <= 80; k += 17) {
13407         for (uint32_t m = 1; m <= 2; m++) {
13408           GemmMicrokernelTester()
13409             .mr(2)
13410             .nr(8)
13411             .kr(4)
13412             .sr(1)
13413             .m(m)
13414             .n(n)
13415             .k(k)
13416             .iterations(1)
13417             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13418         }
13419       }
13420     }
13421   }
13422 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,small_kernel)13423   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, small_kernel) {
13424     TEST_REQUIRES_ARM_NEON;
13425     for (size_t k = 1; k <= 80; k += 17) {
13426       GemmMicrokernelTester()
13427         .mr(2)
13428         .nr(8)
13429         .kr(4)
13430         .sr(1)
13431         .m(2)
13432         .n(8)
13433         .k(k)
13434         .ks(3)
13435         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13436     }
13437   }
13438 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,small_kernel_subtile)13439   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, small_kernel_subtile) {
13440     TEST_REQUIRES_ARM_NEON;
13441     for (size_t k = 1; k <= 80; k += 17) {
13442       for (uint32_t n = 1; n <= 8; n++) {
13443         for (uint32_t m = 1; m <= 2; m++) {
13444           GemmMicrokernelTester()
13445             .mr(2)
13446             .nr(8)
13447             .kr(4)
13448             .sr(1)
13449             .m(m)
13450             .n(n)
13451             .k(k)
13452             .ks(3)
13453             .iterations(1)
13454             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13455         }
13456       }
13457     }
13458   }
13459 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,n_gt_8_small_kernel)13460   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
13461     TEST_REQUIRES_ARM_NEON;
13462     for (uint32_t n = 9; n < 16; n++) {
13463       for (size_t k = 1; k <= 80; k += 17) {
13464         GemmMicrokernelTester()
13465           .mr(2)
13466           .nr(8)
13467           .kr(4)
13468           .sr(1)
13469           .m(2)
13470           .n(n)
13471           .k(k)
13472           .ks(3)
13473           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13474       }
13475     }
13476   }
13477 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,n_div_8_small_kernel)13478   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_div_8_small_kernel) {
13479     TEST_REQUIRES_ARM_NEON;
13480     for (uint32_t n = 16; n <= 24; n += 8) {
13481       for (size_t k = 1; k <= 80; k += 17) {
13482         GemmMicrokernelTester()
13483           .mr(2)
13484           .nr(8)
13485           .kr(4)
13486           .sr(1)
13487           .m(2)
13488           .n(n)
13489           .k(k)
13490           .ks(3)
13491           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13492       }
13493     }
13494   }
13495 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,strided_cm_subtile)13496   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, strided_cm_subtile) {
13497     TEST_REQUIRES_ARM_NEON;
13498     for (size_t k = 1; k <= 80; k += 17) {
13499       for (uint32_t n = 1; n <= 8; n++) {
13500         for (uint32_t m = 1; m <= 2; m++) {
13501           GemmMicrokernelTester()
13502             .mr(2)
13503             .nr(8)
13504             .kr(4)
13505             .sr(1)
13506             .m(m)
13507             .n(n)
13508             .k(k)
13509             .cm_stride(11)
13510             .iterations(1)
13511             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13512         }
13513       }
13514     }
13515   }
13516 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,a_offset)13517   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, a_offset) {
13518     TEST_REQUIRES_ARM_NEON;
13519     for (size_t k = 1; k <= 80; k += 17) {
13520       GemmMicrokernelTester()
13521         .mr(2)
13522         .nr(8)
13523         .kr(4)
13524         .sr(1)
13525         .m(2)
13526         .n(8)
13527         .k(k)
13528         .ks(3)
13529         .a_offset(163)
13530         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13531     }
13532   }
13533 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,zero)13534   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, zero) {
13535     TEST_REQUIRES_ARM_NEON;
13536     for (size_t k = 1; k <= 80; k += 17) {
13537       for (uint32_t mz = 0; mz < 2; mz++) {
13538         GemmMicrokernelTester()
13539           .mr(2)
13540           .nr(8)
13541           .kr(4)
13542           .sr(1)
13543           .m(2)
13544           .n(8)
13545           .k(k)
13546           .ks(3)
13547           .a_offset(163)
13548           .zero_index(mz)
13549           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13550       }
13551     }
13552   }
13553 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,qmin)13554   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, qmin) {
13555     TEST_REQUIRES_ARM_NEON;
13556     GemmMicrokernelTester()
13557       .mr(2)
13558       .nr(8)
13559       .kr(4)
13560       .sr(1)
13561       .m(2)
13562       .n(8)
13563       .k(16)
13564       .qmin(128)
13565       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13566   }
13567 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,qmax)13568   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, qmax) {
13569     TEST_REQUIRES_ARM_NEON;
13570     GemmMicrokernelTester()
13571       .mr(2)
13572       .nr(8)
13573       .kr(4)
13574       .sr(1)
13575       .m(2)
13576       .n(8)
13577       .k(16)
13578       .qmax(128)
13579       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13580   }
13581 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R,strided_cm)13582   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, strided_cm) {
13583     TEST_REQUIRES_ARM_NEON;
13584     GemmMicrokernelTester()
13585       .mr(2)
13586       .nr(8)
13587       .kr(4)
13588       .sr(1)
13589       .m(2)
13590       .n(8)
13591       .k(16)
13592       .cm_stride(11)
13593       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13594   }
13595 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
13596 
13597 
13598 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_eq_8)13599   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8) {
13600     TEST_REQUIRES_ARM_NEON;
13601     GemmMicrokernelTester()
13602       .mr(1)
13603       .nr(8)
13604       .kr(4)
13605       .sr(1)
13606       .m(1)
13607       .n(8)
13608       .k(8)
13609       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13610   }
13611 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,strided_cn)13612   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, strided_cn) {
13613     TEST_REQUIRES_ARM_NEON;
13614     GemmMicrokernelTester()
13615       .mr(1)
13616       .nr(8)
13617       .kr(4)
13618       .sr(1)
13619       .m(1)
13620       .n(8)
13621       .k(8)
13622       .cn_stride(11)
13623       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13624   }
13625 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_eq_8_subtile)13626   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8_subtile) {
13627     TEST_REQUIRES_ARM_NEON;
13628     for (uint32_t n = 1; n <= 8; n++) {
13629       for (uint32_t m = 1; m <= 1; m++) {
13630         GemmMicrokernelTester()
13631           .mr(1)
13632           .nr(8)
13633           .kr(4)
13634           .sr(1)
13635           .m(m)
13636           .n(n)
13637           .k(8)
13638           .iterations(1)
13639           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13640       }
13641     }
13642   }
13643 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_eq_8_subtile_m)13644   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8_subtile_m) {
13645     TEST_REQUIRES_ARM_NEON;
13646     for (uint32_t m = 1; m <= 1; m++) {
13647       GemmMicrokernelTester()
13648         .mr(1)
13649         .nr(8)
13650         .kr(4)
13651         .sr(1)
13652         .m(m)
13653         .n(8)
13654         .k(8)
13655         .iterations(1)
13656         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13657     }
13658   }
13659 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_eq_8_subtile_n)13660   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8_subtile_n) {
13661     TEST_REQUIRES_ARM_NEON;
13662     for (uint32_t n = 1; n <= 8; n++) {
13663       GemmMicrokernelTester()
13664         .mr(1)
13665         .nr(8)
13666         .kr(4)
13667         .sr(1)
13668         .m(1)
13669         .n(n)
13670         .k(8)
13671         .iterations(1)
13672         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13673     }
13674   }
13675 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_lt_8)13676   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_lt_8) {
13677     TEST_REQUIRES_ARM_NEON;
13678     for (size_t k = 1; k < 8; k++) {
13679       GemmMicrokernelTester()
13680         .mr(1)
13681         .nr(8)
13682         .kr(4)
13683         .sr(1)
13684         .m(1)
13685         .n(8)
13686         .k(k)
13687         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13688     }
13689   }
13690 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_lt_8_subtile)13691   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_lt_8_subtile) {
13692     TEST_REQUIRES_ARM_NEON;
13693     for (size_t k = 1; k < 8; k++) {
13694       for (uint32_t n = 1; n <= 8; n++) {
13695         for (uint32_t m = 1; m <= 1; m++) {
13696           GemmMicrokernelTester()
13697             .mr(1)
13698             .nr(8)
13699             .kr(4)
13700             .sr(1)
13701             .m(m)
13702             .n(n)
13703             .k(k)
13704             .iterations(1)
13705             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13706         }
13707       }
13708     }
13709   }
13710 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_gt_8)13711   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_gt_8) {
13712     TEST_REQUIRES_ARM_NEON;
13713     for (size_t k = 9; k < 16; k++) {
13714       GemmMicrokernelTester()
13715         .mr(1)
13716         .nr(8)
13717         .kr(4)
13718         .sr(1)
13719         .m(1)
13720         .n(8)
13721         .k(k)
13722         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13723     }
13724   }
13725 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_gt_8_subtile)13726   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_gt_8_subtile) {
13727     TEST_REQUIRES_ARM_NEON;
13728     for (size_t k = 9; k < 16; k++) {
13729       for (uint32_t n = 1; n <= 8; n++) {
13730         for (uint32_t m = 1; m <= 1; m++) {
13731           GemmMicrokernelTester()
13732             .mr(1)
13733             .nr(8)
13734             .kr(4)
13735             .sr(1)
13736             .m(m)
13737             .n(n)
13738             .k(k)
13739             .iterations(1)
13740             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13741         }
13742       }
13743     }
13744   }
13745 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_div_8)13746   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_div_8) {
13747     TEST_REQUIRES_ARM_NEON;
13748     for (size_t k = 16; k <= 80; k += 8) {
13749       GemmMicrokernelTester()
13750         .mr(1)
13751         .nr(8)
13752         .kr(4)
13753         .sr(1)
13754         .m(1)
13755         .n(8)
13756         .k(k)
13757         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13758     }
13759   }
13760 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_div_8_subtile)13761   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_div_8_subtile) {
13762     TEST_REQUIRES_ARM_NEON;
13763     for (size_t k = 16; k <= 80; k += 8) {
13764       for (uint32_t n = 1; n <= 8; n++) {
13765         for (uint32_t m = 1; m <= 1; m++) {
13766           GemmMicrokernelTester()
13767             .mr(1)
13768             .nr(8)
13769             .kr(4)
13770             .sr(1)
13771             .m(m)
13772             .n(n)
13773             .k(k)
13774             .iterations(1)
13775             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13776         }
13777       }
13778     }
13779   }
13780 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,n_gt_8)13781   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_gt_8) {
13782     TEST_REQUIRES_ARM_NEON;
13783     for (uint32_t n = 9; n < 16; n++) {
13784       for (size_t k = 1; k <= 40; k += 9) {
13785         GemmMicrokernelTester()
13786           .mr(1)
13787           .nr(8)
13788           .kr(4)
13789           .sr(1)
13790           .m(1)
13791           .n(n)
13792           .k(k)
13793           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13794       }
13795     }
13796   }
13797 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,n_gt_8_strided_cn)13798   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_gt_8_strided_cn) {
13799     TEST_REQUIRES_ARM_NEON;
13800     for (uint32_t n = 9; n < 16; n++) {
13801       for (size_t k = 1; k <= 40; k += 9) {
13802         GemmMicrokernelTester()
13803           .mr(1)
13804           .nr(8)
13805           .kr(4)
13806           .sr(1)
13807           .m(1)
13808           .n(n)
13809           .k(k)
13810           .cn_stride(11)
13811           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13812       }
13813     }
13814   }
13815 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,n_gt_8_subtile)13816   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_gt_8_subtile) {
13817     TEST_REQUIRES_ARM_NEON;
13818     for (uint32_t n = 9; n < 16; n++) {
13819       for (size_t k = 1; k <= 40; k += 9) {
13820         for (uint32_t m = 1; m <= 1; m++) {
13821           GemmMicrokernelTester()
13822             .mr(1)
13823             .nr(8)
13824             .kr(4)
13825             .sr(1)
13826             .m(m)
13827             .n(n)
13828             .k(k)
13829             .iterations(1)
13830             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13831         }
13832       }
13833     }
13834   }
13835 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,n_div_8)13836   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_div_8) {
13837     TEST_REQUIRES_ARM_NEON;
13838     for (uint32_t n = 16; n <= 24; n += 8) {
13839       for (size_t k = 1; k <= 40; k += 9) {
13840         GemmMicrokernelTester()
13841           .mr(1)
13842           .nr(8)
13843           .kr(4)
13844           .sr(1)
13845           .m(1)
13846           .n(n)
13847           .k(k)
13848           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13849       }
13850     }
13851   }
13852 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,n_div_8_strided_cn)13853   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_div_8_strided_cn) {
13854     TEST_REQUIRES_ARM_NEON;
13855     for (uint32_t n = 16; n <= 24; n += 8) {
13856       for (size_t k = 1; k <= 40; k += 9) {
13857         GemmMicrokernelTester()
13858           .mr(1)
13859           .nr(8)
13860           .kr(4)
13861           .sr(1)
13862           .m(1)
13863           .n(n)
13864           .k(k)
13865           .cn_stride(11)
13866           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13867       }
13868     }
13869   }
13870 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,n_div_8_subtile)13871   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_div_8_subtile) {
13872     TEST_REQUIRES_ARM_NEON;
13873     for (uint32_t n = 16; n <= 24; n += 8) {
13874       for (size_t k = 1; k <= 40; k += 9) {
13875         for (uint32_t m = 1; m <= 1; m++) {
13876           GemmMicrokernelTester()
13877             .mr(1)
13878             .nr(8)
13879             .kr(4)
13880             .sr(1)
13881             .m(m)
13882             .n(n)
13883             .k(k)
13884             .iterations(1)
13885             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13886         }
13887       }
13888     }
13889   }
13890 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,small_kernel)13891   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, small_kernel) {
13892     TEST_REQUIRES_ARM_NEON;
13893     for (size_t k = 1; k <= 40; k += 9) {
13894       GemmMicrokernelTester()
13895         .mr(1)
13896         .nr(8)
13897         .kr(4)
13898         .sr(1)
13899         .m(1)
13900         .n(8)
13901         .k(k)
13902         .ks(3)
13903         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13904     }
13905   }
13906 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,small_kernel_subtile)13907   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, small_kernel_subtile) {
13908     TEST_REQUIRES_ARM_NEON;
13909     for (size_t k = 1; k <= 40; k += 9) {
13910       for (uint32_t n = 1; n <= 8; n++) {
13911         for (uint32_t m = 1; m <= 1; m++) {
13912           GemmMicrokernelTester()
13913             .mr(1)
13914             .nr(8)
13915             .kr(4)
13916             .sr(1)
13917             .m(m)
13918             .n(n)
13919             .k(k)
13920             .ks(3)
13921             .iterations(1)
13922             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13923         }
13924       }
13925     }
13926   }
13927 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,n_gt_8_small_kernel)13928   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_gt_8_small_kernel) {
13929     TEST_REQUIRES_ARM_NEON;
13930     for (uint32_t n = 9; n < 16; n++) {
13931       for (size_t k = 1; k <= 40; k += 9) {
13932         GemmMicrokernelTester()
13933           .mr(1)
13934           .nr(8)
13935           .kr(4)
13936           .sr(1)
13937           .m(1)
13938           .n(n)
13939           .k(k)
13940           .ks(3)
13941           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13942       }
13943     }
13944   }
13945 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,n_div_8_small_kernel)13946   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_div_8_small_kernel) {
13947     TEST_REQUIRES_ARM_NEON;
13948     for (uint32_t n = 16; n <= 24; n += 8) {
13949       for (size_t k = 1; k <= 40; k += 9) {
13950         GemmMicrokernelTester()
13951           .mr(1)
13952           .nr(8)
13953           .kr(4)
13954           .sr(1)
13955           .m(1)
13956           .n(n)
13957           .k(k)
13958           .ks(3)
13959           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13960       }
13961     }
13962   }
13963 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,strided_cm_subtile)13964   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, strided_cm_subtile) {
13965     TEST_REQUIRES_ARM_NEON;
13966     for (size_t k = 1; k <= 40; k += 9) {
13967       for (uint32_t n = 1; n <= 8; n++) {
13968         for (uint32_t m = 1; m <= 1; m++) {
13969           GemmMicrokernelTester()
13970             .mr(1)
13971             .nr(8)
13972             .kr(4)
13973             .sr(1)
13974             .m(m)
13975             .n(n)
13976             .k(k)
13977             .cm_stride(11)
13978             .iterations(1)
13979             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13980         }
13981       }
13982     }
13983   }
13984 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,a_offset)13985   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, a_offset) {
13986     TEST_REQUIRES_ARM_NEON;
13987     for (size_t k = 1; k <= 40; k += 9) {
13988       GemmMicrokernelTester()
13989         .mr(1)
13990         .nr(8)
13991         .kr(4)
13992         .sr(1)
13993         .m(1)
13994         .n(8)
13995         .k(k)
13996         .ks(3)
13997         .a_offset(43)
13998         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13999     }
14000   }
14001 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,zero)14002   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, zero) {
14003     TEST_REQUIRES_ARM_NEON;
14004     for (size_t k = 1; k <= 40; k += 9) {
14005       for (uint32_t mz = 0; mz < 1; mz++) {
14006         GemmMicrokernelTester()
14007           .mr(1)
14008           .nr(8)
14009           .kr(4)
14010           .sr(1)
14011           .m(1)
14012           .n(8)
14013           .k(k)
14014           .ks(3)
14015           .a_offset(43)
14016           .zero_index(mz)
14017           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14018       }
14019     }
14020   }
14021 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,qmin)14022   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, qmin) {
14023     TEST_REQUIRES_ARM_NEON;
14024     GemmMicrokernelTester()
14025       .mr(1)
14026       .nr(8)
14027       .kr(4)
14028       .sr(1)
14029       .m(1)
14030       .n(8)
14031       .k(8)
14032       .qmin(128)
14033       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14034   }
14035 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,qmax)14036   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, qmax) {
14037     TEST_REQUIRES_ARM_NEON;
14038     GemmMicrokernelTester()
14039       .mr(1)
14040       .nr(8)
14041       .kr(4)
14042       .sr(1)
14043       .m(1)
14044       .n(8)
14045       .k(8)
14046       .qmax(128)
14047       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14048   }
14049 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,strided_cm)14050   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, strided_cm) {
14051     TEST_REQUIRES_ARM_NEON;
14052     GemmMicrokernelTester()
14053       .mr(1)
14054       .nr(8)
14055       .kr(4)
14056       .sr(1)
14057       .m(1)
14058       .n(8)
14059       .k(8)
14060       .cm_stride(11)
14061       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14062   }
14063 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
14064 
14065 
14066 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_eq_16)14067   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16) {
14068     TEST_REQUIRES_ARM_NEON;
14069     GemmMicrokernelTester()
14070       .mr(1)
14071       .nr(8)
14072       .kr(4)
14073       .sr(1)
14074       .m(1)
14075       .n(8)
14076       .k(16)
14077       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14078   }
14079 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,strided_cn)14080   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, strided_cn) {
14081     TEST_REQUIRES_ARM_NEON;
14082     GemmMicrokernelTester()
14083       .mr(1)
14084       .nr(8)
14085       .kr(4)
14086       .sr(1)
14087       .m(1)
14088       .n(8)
14089       .k(16)
14090       .cn_stride(11)
14091       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14092   }
14093 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_eq_16_subtile)14094   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile) {
14095     TEST_REQUIRES_ARM_NEON;
14096     for (uint32_t n = 1; n <= 8; n++) {
14097       for (uint32_t m = 1; m <= 1; m++) {
14098         GemmMicrokernelTester()
14099           .mr(1)
14100           .nr(8)
14101           .kr(4)
14102           .sr(1)
14103           .m(m)
14104           .n(n)
14105           .k(16)
14106           .iterations(1)
14107           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14108       }
14109     }
14110   }
14111 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_m)14112   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
14113     TEST_REQUIRES_ARM_NEON;
14114     for (uint32_t m = 1; m <= 1; m++) {
14115       GemmMicrokernelTester()
14116         .mr(1)
14117         .nr(8)
14118         .kr(4)
14119         .sr(1)
14120         .m(m)
14121         .n(8)
14122         .k(16)
14123         .iterations(1)
14124         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14125     }
14126   }
14127 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_n)14128   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
14129     TEST_REQUIRES_ARM_NEON;
14130     for (uint32_t n = 1; n <= 8; n++) {
14131       GemmMicrokernelTester()
14132         .mr(1)
14133         .nr(8)
14134         .kr(4)
14135         .sr(1)
14136         .m(1)
14137         .n(n)
14138         .k(16)
14139         .iterations(1)
14140         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14141     }
14142   }
14143 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_lt_16)14144   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_lt_16) {
14145     TEST_REQUIRES_ARM_NEON;
14146     for (size_t k = 1; k < 16; k++) {
14147       GemmMicrokernelTester()
14148         .mr(1)
14149         .nr(8)
14150         .kr(4)
14151         .sr(1)
14152         .m(1)
14153         .n(8)
14154         .k(k)
14155         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14156     }
14157   }
14158 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_lt_16_subtile)14159   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_lt_16_subtile) {
14160     TEST_REQUIRES_ARM_NEON;
14161     for (size_t k = 1; k < 16; k++) {
14162       for (uint32_t n = 1; n <= 8; n++) {
14163         for (uint32_t m = 1; m <= 1; m++) {
14164           GemmMicrokernelTester()
14165             .mr(1)
14166             .nr(8)
14167             .kr(4)
14168             .sr(1)
14169             .m(m)
14170             .n(n)
14171             .k(k)
14172             .iterations(1)
14173             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14174         }
14175       }
14176     }
14177   }
14178 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_gt_16)14179   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_gt_16) {
14180     TEST_REQUIRES_ARM_NEON;
14181     for (size_t k = 17; k < 32; k++) {
14182       GemmMicrokernelTester()
14183         .mr(1)
14184         .nr(8)
14185         .kr(4)
14186         .sr(1)
14187         .m(1)
14188         .n(8)
14189         .k(k)
14190         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14191     }
14192   }
14193 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_gt_16_subtile)14194   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_gt_16_subtile) {
14195     TEST_REQUIRES_ARM_NEON;
14196     for (size_t k = 17; k < 32; k++) {
14197       for (uint32_t n = 1; n <= 8; n++) {
14198         for (uint32_t m = 1; m <= 1; m++) {
14199           GemmMicrokernelTester()
14200             .mr(1)
14201             .nr(8)
14202             .kr(4)
14203             .sr(1)
14204             .m(m)
14205             .n(n)
14206             .k(k)
14207             .iterations(1)
14208             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14209         }
14210       }
14211     }
14212   }
14213 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_div_16)14214   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_div_16) {
14215     TEST_REQUIRES_ARM_NEON;
14216     for (size_t k = 32; k <= 160; k += 16) {
14217       GemmMicrokernelTester()
14218         .mr(1)
14219         .nr(8)
14220         .kr(4)
14221         .sr(1)
14222         .m(1)
14223         .n(8)
14224         .k(k)
14225         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14226     }
14227   }
14228 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_div_16_subtile)14229   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_div_16_subtile) {
14230     TEST_REQUIRES_ARM_NEON;
14231     for (size_t k = 32; k <= 160; k += 16) {
14232       for (uint32_t n = 1; n <= 8; n++) {
14233         for (uint32_t m = 1; m <= 1; m++) {
14234           GemmMicrokernelTester()
14235             .mr(1)
14236             .nr(8)
14237             .kr(4)
14238             .sr(1)
14239             .m(m)
14240             .n(n)
14241             .k(k)
14242             .iterations(1)
14243             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14244         }
14245       }
14246     }
14247   }
14248 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,n_gt_8)14249   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_gt_8) {
14250     TEST_REQUIRES_ARM_NEON;
14251     for (uint32_t n = 9; n < 16; n++) {
14252       for (size_t k = 1; k <= 80; k += 17) {
14253         GemmMicrokernelTester()
14254           .mr(1)
14255           .nr(8)
14256           .kr(4)
14257           .sr(1)
14258           .m(1)
14259           .n(n)
14260           .k(k)
14261           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14262       }
14263     }
14264   }
14265 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,n_gt_8_strided_cn)14266   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
14267     TEST_REQUIRES_ARM_NEON;
14268     for (uint32_t n = 9; n < 16; n++) {
14269       for (size_t k = 1; k <= 80; k += 17) {
14270         GemmMicrokernelTester()
14271           .mr(1)
14272           .nr(8)
14273           .kr(4)
14274           .sr(1)
14275           .m(1)
14276           .n(n)
14277           .k(k)
14278           .cn_stride(11)
14279           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14280       }
14281     }
14282   }
14283 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,n_gt_8_subtile)14284   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_gt_8_subtile) {
14285     TEST_REQUIRES_ARM_NEON;
14286     for (uint32_t n = 9; n < 16; n++) {
14287       for (size_t k = 1; k <= 80; k += 17) {
14288         for (uint32_t m = 1; m <= 1; m++) {
14289           GemmMicrokernelTester()
14290             .mr(1)
14291             .nr(8)
14292             .kr(4)
14293             .sr(1)
14294             .m(m)
14295             .n(n)
14296             .k(k)
14297             .iterations(1)
14298             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14299         }
14300       }
14301     }
14302   }
14303 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,n_div_8)14304   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_div_8) {
14305     TEST_REQUIRES_ARM_NEON;
14306     for (uint32_t n = 16; n <= 24; n += 8) {
14307       for (size_t k = 1; k <= 80; k += 17) {
14308         GemmMicrokernelTester()
14309           .mr(1)
14310           .nr(8)
14311           .kr(4)
14312           .sr(1)
14313           .m(1)
14314           .n(n)
14315           .k(k)
14316           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14317       }
14318     }
14319   }
14320 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,n_div_8_strided_cn)14321   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_div_8_strided_cn) {
14322     TEST_REQUIRES_ARM_NEON;
14323     for (uint32_t n = 16; n <= 24; n += 8) {
14324       for (size_t k = 1; k <= 80; k += 17) {
14325         GemmMicrokernelTester()
14326           .mr(1)
14327           .nr(8)
14328           .kr(4)
14329           .sr(1)
14330           .m(1)
14331           .n(n)
14332           .k(k)
14333           .cn_stride(11)
14334           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14335       }
14336     }
14337   }
14338 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,n_div_8_subtile)14339   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_div_8_subtile) {
14340     TEST_REQUIRES_ARM_NEON;
14341     for (uint32_t n = 16; n <= 24; n += 8) {
14342       for (size_t k = 1; k <= 80; k += 17) {
14343         for (uint32_t m = 1; m <= 1; m++) {
14344           GemmMicrokernelTester()
14345             .mr(1)
14346             .nr(8)
14347             .kr(4)
14348             .sr(1)
14349             .m(m)
14350             .n(n)
14351             .k(k)
14352             .iterations(1)
14353             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14354         }
14355       }
14356     }
14357   }
14358 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,small_kernel)14359   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, small_kernel) {
14360     TEST_REQUIRES_ARM_NEON;
14361     for (size_t k = 1; k <= 80; k += 17) {
14362       GemmMicrokernelTester()
14363         .mr(1)
14364         .nr(8)
14365         .kr(4)
14366         .sr(1)
14367         .m(1)
14368         .n(8)
14369         .k(k)
14370         .ks(3)
14371         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14372     }
14373   }
14374 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,small_kernel_subtile)14375   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, small_kernel_subtile) {
14376     TEST_REQUIRES_ARM_NEON;
14377     for (size_t k = 1; k <= 80; k += 17) {
14378       for (uint32_t n = 1; n <= 8; n++) {
14379         for (uint32_t m = 1; m <= 1; m++) {
14380           GemmMicrokernelTester()
14381             .mr(1)
14382             .nr(8)
14383             .kr(4)
14384             .sr(1)
14385             .m(m)
14386             .n(n)
14387             .k(k)
14388             .ks(3)
14389             .iterations(1)
14390             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14391         }
14392       }
14393     }
14394   }
14395 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,n_gt_8_small_kernel)14396   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_gt_8_small_kernel) {
14397     TEST_REQUIRES_ARM_NEON;
14398     for (uint32_t n = 9; n < 16; n++) {
14399       for (size_t k = 1; k <= 80; k += 17) {
14400         GemmMicrokernelTester()
14401           .mr(1)
14402           .nr(8)
14403           .kr(4)
14404           .sr(1)
14405           .m(1)
14406           .n(n)
14407           .k(k)
14408           .ks(3)
14409           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14410       }
14411     }
14412   }
14413 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,n_div_8_small_kernel)14414   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_div_8_small_kernel) {
14415     TEST_REQUIRES_ARM_NEON;
14416     for (uint32_t n = 16; n <= 24; n += 8) {
14417       for (size_t k = 1; k <= 80; k += 17) {
14418         GemmMicrokernelTester()
14419           .mr(1)
14420           .nr(8)
14421           .kr(4)
14422           .sr(1)
14423           .m(1)
14424           .n(n)
14425           .k(k)
14426           .ks(3)
14427           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14428       }
14429     }
14430   }
14431 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,strided_cm_subtile)14432   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, strided_cm_subtile) {
14433     TEST_REQUIRES_ARM_NEON;
14434     for (size_t k = 1; k <= 80; k += 17) {
14435       for (uint32_t n = 1; n <= 8; n++) {
14436         for (uint32_t m = 1; m <= 1; m++) {
14437           GemmMicrokernelTester()
14438             .mr(1)
14439             .nr(8)
14440             .kr(4)
14441             .sr(1)
14442             .m(m)
14443             .n(n)
14444             .k(k)
14445             .cm_stride(11)
14446             .iterations(1)
14447             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14448         }
14449       }
14450     }
14451   }
14452 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,a_offset)14453   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, a_offset) {
14454     TEST_REQUIRES_ARM_NEON;
14455     for (size_t k = 1; k <= 80; k += 17) {
14456       GemmMicrokernelTester()
14457         .mr(1)
14458         .nr(8)
14459         .kr(4)
14460         .sr(1)
14461         .m(1)
14462         .n(8)
14463         .k(k)
14464         .ks(3)
14465         .a_offset(83)
14466         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14467     }
14468   }
14469 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,zero)14470   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, zero) {
14471     TEST_REQUIRES_ARM_NEON;
14472     for (size_t k = 1; k <= 80; k += 17) {
14473       for (uint32_t mz = 0; mz < 1; mz++) {
14474         GemmMicrokernelTester()
14475           .mr(1)
14476           .nr(8)
14477           .kr(4)
14478           .sr(1)
14479           .m(1)
14480           .n(8)
14481           .k(k)
14482           .ks(3)
14483           .a_offset(83)
14484           .zero_index(mz)
14485           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14486       }
14487     }
14488   }
14489 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,qmin)14490   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, qmin) {
14491     TEST_REQUIRES_ARM_NEON;
14492     GemmMicrokernelTester()
14493       .mr(1)
14494       .nr(8)
14495       .kr(4)
14496       .sr(1)
14497       .m(1)
14498       .n(8)
14499       .k(16)
14500       .qmin(128)
14501       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14502   }
14503 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,qmax)14504   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, qmax) {
14505     TEST_REQUIRES_ARM_NEON;
14506     GemmMicrokernelTester()
14507       .mr(1)
14508       .nr(8)
14509       .kr(4)
14510       .sr(1)
14511       .m(1)
14512       .n(8)
14513       .k(16)
14514       .qmax(128)
14515       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14516   }
14517 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,strided_cm)14518   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, strided_cm) {
14519     TEST_REQUIRES_ARM_NEON;
14520     GemmMicrokernelTester()
14521       .mr(1)
14522       .nr(8)
14523       .kr(4)
14524       .sr(1)
14525       .m(1)
14526       .n(8)
14527       .k(16)
14528       .cm_stride(11)
14529       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14530   }
14531 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
14532 
14533 
14534 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,k_eq_8)14535   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_eq_8) {
14536     TEST_REQUIRES_ARM_NEON;
14537     GemmMicrokernelTester()
14538       .mr(1)
14539       .nr(8)
14540       .kr(2)
14541       .sr(1)
14542       .m(1)
14543       .n(8)
14544       .k(8)
14545       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14546   }
14547 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,strided_cn)14548   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, strided_cn) {
14549     TEST_REQUIRES_ARM_NEON;
14550     GemmMicrokernelTester()
14551       .mr(1)
14552       .nr(8)
14553       .kr(2)
14554       .sr(1)
14555       .m(1)
14556       .n(8)
14557       .k(8)
14558       .cn_stride(11)
14559       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14560   }
14561 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,k_eq_8_subtile)14562   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_eq_8_subtile) {
14563     TEST_REQUIRES_ARM_NEON;
14564     for (uint32_t n = 1; n <= 8; n++) {
14565       for (uint32_t m = 1; m <= 1; m++) {
14566         GemmMicrokernelTester()
14567           .mr(1)
14568           .nr(8)
14569           .kr(2)
14570           .sr(1)
14571           .m(m)
14572           .n(n)
14573           .k(8)
14574           .iterations(1)
14575           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14576       }
14577     }
14578   }
14579 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,k_eq_8_subtile_m)14580   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_eq_8_subtile_m) {
14581     TEST_REQUIRES_ARM_NEON;
14582     for (uint32_t m = 1; m <= 1; m++) {
14583       GemmMicrokernelTester()
14584         .mr(1)
14585         .nr(8)
14586         .kr(2)
14587         .sr(1)
14588         .m(m)
14589         .n(8)
14590         .k(8)
14591         .iterations(1)
14592         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14593     }
14594   }
14595 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,k_eq_8_subtile_n)14596   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_eq_8_subtile_n) {
14597     TEST_REQUIRES_ARM_NEON;
14598     for (uint32_t n = 1; n <= 8; n++) {
14599       GemmMicrokernelTester()
14600         .mr(1)
14601         .nr(8)
14602         .kr(2)
14603         .sr(1)
14604         .m(1)
14605         .n(n)
14606         .k(8)
14607         .iterations(1)
14608         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14609     }
14610   }
14611 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,k_lt_8)14612   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_lt_8) {
14613     TEST_REQUIRES_ARM_NEON;
14614     for (size_t k = 1; k < 8; k++) {
14615       GemmMicrokernelTester()
14616         .mr(1)
14617         .nr(8)
14618         .kr(2)
14619         .sr(1)
14620         .m(1)
14621         .n(8)
14622         .k(k)
14623         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14624     }
14625   }
14626 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,k_lt_8_subtile)14627   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_lt_8_subtile) {
14628     TEST_REQUIRES_ARM_NEON;
14629     for (size_t k = 1; k < 8; k++) {
14630       for (uint32_t n = 1; n <= 8; n++) {
14631         for (uint32_t m = 1; m <= 1; m++) {
14632           GemmMicrokernelTester()
14633             .mr(1)
14634             .nr(8)
14635             .kr(2)
14636             .sr(1)
14637             .m(m)
14638             .n(n)
14639             .k(k)
14640             .iterations(1)
14641             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14642         }
14643       }
14644     }
14645   }
14646 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,k_gt_8)14647   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_gt_8) {
14648     TEST_REQUIRES_ARM_NEON;
14649     for (size_t k = 9; k < 16; k++) {
14650       GemmMicrokernelTester()
14651         .mr(1)
14652         .nr(8)
14653         .kr(2)
14654         .sr(1)
14655         .m(1)
14656         .n(8)
14657         .k(k)
14658         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14659     }
14660   }
14661 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,k_gt_8_subtile)14662   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_gt_8_subtile) {
14663     TEST_REQUIRES_ARM_NEON;
14664     for (size_t k = 9; k < 16; k++) {
14665       for (uint32_t n = 1; n <= 8; n++) {
14666         for (uint32_t m = 1; m <= 1; m++) {
14667           GemmMicrokernelTester()
14668             .mr(1)
14669             .nr(8)
14670             .kr(2)
14671             .sr(1)
14672             .m(m)
14673             .n(n)
14674             .k(k)
14675             .iterations(1)
14676             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14677         }
14678       }
14679     }
14680   }
14681 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,k_div_8)14682   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_div_8) {
14683     TEST_REQUIRES_ARM_NEON;
14684     for (size_t k = 16; k <= 80; k += 8) {
14685       GemmMicrokernelTester()
14686         .mr(1)
14687         .nr(8)
14688         .kr(2)
14689         .sr(1)
14690         .m(1)
14691         .n(8)
14692         .k(k)
14693         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14694     }
14695   }
14696 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,k_div_8_subtile)14697   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_div_8_subtile) {
14698     TEST_REQUIRES_ARM_NEON;
14699     for (size_t k = 16; k <= 80; k += 8) {
14700       for (uint32_t n = 1; n <= 8; n++) {
14701         for (uint32_t m = 1; m <= 1; m++) {
14702           GemmMicrokernelTester()
14703             .mr(1)
14704             .nr(8)
14705             .kr(2)
14706             .sr(1)
14707             .m(m)
14708             .n(n)
14709             .k(k)
14710             .iterations(1)
14711             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14712         }
14713       }
14714     }
14715   }
14716 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,n_gt_8)14717   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_gt_8) {
14718     TEST_REQUIRES_ARM_NEON;
14719     for (uint32_t n = 9; n < 16; n++) {
14720       for (size_t k = 1; k <= 40; k += 9) {
14721         GemmMicrokernelTester()
14722           .mr(1)
14723           .nr(8)
14724           .kr(2)
14725           .sr(1)
14726           .m(1)
14727           .n(n)
14728           .k(k)
14729           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14730       }
14731     }
14732   }
14733 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,n_gt_8_strided_cn)14734   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_gt_8_strided_cn) {
14735     TEST_REQUIRES_ARM_NEON;
14736     for (uint32_t n = 9; n < 16; n++) {
14737       for (size_t k = 1; k <= 40; k += 9) {
14738         GemmMicrokernelTester()
14739           .mr(1)
14740           .nr(8)
14741           .kr(2)
14742           .sr(1)
14743           .m(1)
14744           .n(n)
14745           .k(k)
14746           .cn_stride(11)
14747           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14748       }
14749     }
14750   }
14751 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,n_gt_8_subtile)14752   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_gt_8_subtile) {
14753     TEST_REQUIRES_ARM_NEON;
14754     for (uint32_t n = 9; n < 16; n++) {
14755       for (size_t k = 1; k <= 40; k += 9) {
14756         for (uint32_t m = 1; m <= 1; m++) {
14757           GemmMicrokernelTester()
14758             .mr(1)
14759             .nr(8)
14760             .kr(2)
14761             .sr(1)
14762             .m(m)
14763             .n(n)
14764             .k(k)
14765             .iterations(1)
14766             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14767         }
14768       }
14769     }
14770   }
14771 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,n_div_8)14772   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_div_8) {
14773     TEST_REQUIRES_ARM_NEON;
14774     for (uint32_t n = 16; n <= 24; n += 8) {
14775       for (size_t k = 1; k <= 40; k += 9) {
14776         GemmMicrokernelTester()
14777           .mr(1)
14778           .nr(8)
14779           .kr(2)
14780           .sr(1)
14781           .m(1)
14782           .n(n)
14783           .k(k)
14784           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14785       }
14786     }
14787   }
14788 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,n_div_8_strided_cn)14789   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_div_8_strided_cn) {
14790     TEST_REQUIRES_ARM_NEON;
14791     for (uint32_t n = 16; n <= 24; n += 8) {
14792       for (size_t k = 1; k <= 40; k += 9) {
14793         GemmMicrokernelTester()
14794           .mr(1)
14795           .nr(8)
14796           .kr(2)
14797           .sr(1)
14798           .m(1)
14799           .n(n)
14800           .k(k)
14801           .cn_stride(11)
14802           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14803       }
14804     }
14805   }
14806 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,n_div_8_subtile)14807   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_div_8_subtile) {
14808     TEST_REQUIRES_ARM_NEON;
14809     for (uint32_t n = 16; n <= 24; n += 8) {
14810       for (size_t k = 1; k <= 40; k += 9) {
14811         for (uint32_t m = 1; m <= 1; m++) {
14812           GemmMicrokernelTester()
14813             .mr(1)
14814             .nr(8)
14815             .kr(2)
14816             .sr(1)
14817             .m(m)
14818             .n(n)
14819             .k(k)
14820             .iterations(1)
14821             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14822         }
14823       }
14824     }
14825   }
14826 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,small_kernel)14827   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, small_kernel) {
14828     TEST_REQUIRES_ARM_NEON;
14829     for (size_t k = 1; k <= 40; k += 9) {
14830       GemmMicrokernelTester()
14831         .mr(1)
14832         .nr(8)
14833         .kr(2)
14834         .sr(1)
14835         .m(1)
14836         .n(8)
14837         .k(k)
14838         .ks(3)
14839         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14840     }
14841   }
14842 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,small_kernel_subtile)14843   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, small_kernel_subtile) {
14844     TEST_REQUIRES_ARM_NEON;
14845     for (size_t k = 1; k <= 40; k += 9) {
14846       for (uint32_t n = 1; n <= 8; n++) {
14847         for (uint32_t m = 1; m <= 1; m++) {
14848           GemmMicrokernelTester()
14849             .mr(1)
14850             .nr(8)
14851             .kr(2)
14852             .sr(1)
14853             .m(m)
14854             .n(n)
14855             .k(k)
14856             .ks(3)
14857             .iterations(1)
14858             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14859         }
14860       }
14861     }
14862   }
14863 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,n_gt_8_small_kernel)14864   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_gt_8_small_kernel) {
14865     TEST_REQUIRES_ARM_NEON;
14866     for (uint32_t n = 9; n < 16; n++) {
14867       for (size_t k = 1; k <= 40; k += 9) {
14868         GemmMicrokernelTester()
14869           .mr(1)
14870           .nr(8)
14871           .kr(2)
14872           .sr(1)
14873           .m(1)
14874           .n(n)
14875           .k(k)
14876           .ks(3)
14877           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14878       }
14879     }
14880   }
14881 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,n_div_8_small_kernel)14882   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_div_8_small_kernel) {
14883     TEST_REQUIRES_ARM_NEON;
14884     for (uint32_t n = 16; n <= 24; n += 8) {
14885       for (size_t k = 1; k <= 40; k += 9) {
14886         GemmMicrokernelTester()
14887           .mr(1)
14888           .nr(8)
14889           .kr(2)
14890           .sr(1)
14891           .m(1)
14892           .n(n)
14893           .k(k)
14894           .ks(3)
14895           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14896       }
14897     }
14898   }
14899 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,strided_cm_subtile)14900   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, strided_cm_subtile) {
14901     TEST_REQUIRES_ARM_NEON;
14902     for (size_t k = 1; k <= 40; k += 9) {
14903       for (uint32_t n = 1; n <= 8; n++) {
14904         for (uint32_t m = 1; m <= 1; m++) {
14905           GemmMicrokernelTester()
14906             .mr(1)
14907             .nr(8)
14908             .kr(2)
14909             .sr(1)
14910             .m(m)
14911             .n(n)
14912             .k(k)
14913             .cm_stride(11)
14914             .iterations(1)
14915             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14916         }
14917       }
14918     }
14919   }
14920 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,a_offset)14921   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, a_offset) {
14922     TEST_REQUIRES_ARM_NEON;
14923     for (size_t k = 1; k <= 40; k += 9) {
14924       GemmMicrokernelTester()
14925         .mr(1)
14926         .nr(8)
14927         .kr(2)
14928         .sr(1)
14929         .m(1)
14930         .n(8)
14931         .k(k)
14932         .ks(3)
14933         .a_offset(43)
14934         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14935     }
14936   }
14937 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,zero)14938   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, zero) {
14939     TEST_REQUIRES_ARM_NEON;
14940     for (size_t k = 1; k <= 40; k += 9) {
14941       for (uint32_t mz = 0; mz < 1; mz++) {
14942         GemmMicrokernelTester()
14943           .mr(1)
14944           .nr(8)
14945           .kr(2)
14946           .sr(1)
14947           .m(1)
14948           .n(8)
14949           .k(k)
14950           .ks(3)
14951           .a_offset(43)
14952           .zero_index(mz)
14953           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14954       }
14955     }
14956   }
14957 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,qmin)14958   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, qmin) {
14959     TEST_REQUIRES_ARM_NEON;
14960     GemmMicrokernelTester()
14961       .mr(1)
14962       .nr(8)
14963       .kr(2)
14964       .sr(1)
14965       .m(1)
14966       .n(8)
14967       .k(8)
14968       .qmin(128)
14969       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14970   }
14971 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,qmax)14972   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, qmax) {
14973     TEST_REQUIRES_ARM_NEON;
14974     GemmMicrokernelTester()
14975       .mr(1)
14976       .nr(8)
14977       .kr(2)
14978       .sr(1)
14979       .m(1)
14980       .n(8)
14981       .k(8)
14982       .qmax(128)
14983       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14984   }
14985 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP,strided_cm)14986   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, strided_cm) {
14987     TEST_REQUIRES_ARM_NEON;
14988     GemmMicrokernelTester()
14989       .mr(1)
14990       .nr(8)
14991       .kr(2)
14992       .sr(1)
14993       .m(1)
14994       .n(8)
14995       .k(8)
14996       .cm_stride(11)
14997       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14998   }
14999 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
15000 
15001 
15002 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,k_eq_8)15003   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_eq_8) {
15004     TEST_REQUIRES_ARM_NEON;
15005     GemmMicrokernelTester()
15006       .mr(3)
15007       .nr(8)
15008       .kr(2)
15009       .sr(1)
15010       .m(3)
15011       .n(8)
15012       .k(8)
15013       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15014   }
15015 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,strided_cn)15016   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, strided_cn) {
15017     TEST_REQUIRES_ARM_NEON;
15018     GemmMicrokernelTester()
15019       .mr(3)
15020       .nr(8)
15021       .kr(2)
15022       .sr(1)
15023       .m(3)
15024       .n(8)
15025       .k(8)
15026       .cn_stride(11)
15027       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15028   }
15029 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,k_eq_8_subtile)15030   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_eq_8_subtile) {
15031     TEST_REQUIRES_ARM_NEON;
15032     for (uint32_t n = 1; n <= 8; n++) {
15033       for (uint32_t m = 1; m <= 3; m++) {
15034         GemmMicrokernelTester()
15035           .mr(3)
15036           .nr(8)
15037           .kr(2)
15038           .sr(1)
15039           .m(m)
15040           .n(n)
15041           .k(8)
15042           .iterations(1)
15043           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15044       }
15045     }
15046   }
15047 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,k_eq_8_subtile_m)15048   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_eq_8_subtile_m) {
15049     TEST_REQUIRES_ARM_NEON;
15050     for (uint32_t m = 1; m <= 3; m++) {
15051       GemmMicrokernelTester()
15052         .mr(3)
15053         .nr(8)
15054         .kr(2)
15055         .sr(1)
15056         .m(m)
15057         .n(8)
15058         .k(8)
15059         .iterations(1)
15060         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15061     }
15062   }
15063 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,k_eq_8_subtile_n)15064   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_eq_8_subtile_n) {
15065     TEST_REQUIRES_ARM_NEON;
15066     for (uint32_t n = 1; n <= 8; n++) {
15067       GemmMicrokernelTester()
15068         .mr(3)
15069         .nr(8)
15070         .kr(2)
15071         .sr(1)
15072         .m(3)
15073         .n(n)
15074         .k(8)
15075         .iterations(1)
15076         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15077     }
15078   }
15079 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,k_lt_8)15080   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_lt_8) {
15081     TEST_REQUIRES_ARM_NEON;
15082     for (size_t k = 1; k < 8; k++) {
15083       GemmMicrokernelTester()
15084         .mr(3)
15085         .nr(8)
15086         .kr(2)
15087         .sr(1)
15088         .m(3)
15089         .n(8)
15090         .k(k)
15091         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15092     }
15093   }
15094 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,k_lt_8_subtile)15095   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_lt_8_subtile) {
15096     TEST_REQUIRES_ARM_NEON;
15097     for (size_t k = 1; k < 8; k++) {
15098       for (uint32_t n = 1; n <= 8; n++) {
15099         for (uint32_t m = 1; m <= 3; m++) {
15100           GemmMicrokernelTester()
15101             .mr(3)
15102             .nr(8)
15103             .kr(2)
15104             .sr(1)
15105             .m(m)
15106             .n(n)
15107             .k(k)
15108             .iterations(1)
15109             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15110         }
15111       }
15112     }
15113   }
15114 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,k_gt_8)15115   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_gt_8) {
15116     TEST_REQUIRES_ARM_NEON;
15117     for (size_t k = 9; k < 16; k++) {
15118       GemmMicrokernelTester()
15119         .mr(3)
15120         .nr(8)
15121         .kr(2)
15122         .sr(1)
15123         .m(3)
15124         .n(8)
15125         .k(k)
15126         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15127     }
15128   }
15129 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,k_gt_8_subtile)15130   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_gt_8_subtile) {
15131     TEST_REQUIRES_ARM_NEON;
15132     for (size_t k = 9; k < 16; k++) {
15133       for (uint32_t n = 1; n <= 8; n++) {
15134         for (uint32_t m = 1; m <= 3; m++) {
15135           GemmMicrokernelTester()
15136             .mr(3)
15137             .nr(8)
15138             .kr(2)
15139             .sr(1)
15140             .m(m)
15141             .n(n)
15142             .k(k)
15143             .iterations(1)
15144             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15145         }
15146       }
15147     }
15148   }
15149 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,k_div_8)15150   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_div_8) {
15151     TEST_REQUIRES_ARM_NEON;
15152     for (size_t k = 16; k <= 80; k += 8) {
15153       GemmMicrokernelTester()
15154         .mr(3)
15155         .nr(8)
15156         .kr(2)
15157         .sr(1)
15158         .m(3)
15159         .n(8)
15160         .k(k)
15161         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15162     }
15163   }
15164 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,k_div_8_subtile)15165   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_div_8_subtile) {
15166     TEST_REQUIRES_ARM_NEON;
15167     for (size_t k = 16; k <= 80; k += 8) {
15168       for (uint32_t n = 1; n <= 8; n++) {
15169         for (uint32_t m = 1; m <= 3; m++) {
15170           GemmMicrokernelTester()
15171             .mr(3)
15172             .nr(8)
15173             .kr(2)
15174             .sr(1)
15175             .m(m)
15176             .n(n)
15177             .k(k)
15178             .iterations(1)
15179             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15180         }
15181       }
15182     }
15183   }
15184 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,n_gt_8)15185   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_gt_8) {
15186     TEST_REQUIRES_ARM_NEON;
15187     for (uint32_t n = 9; n < 16; n++) {
15188       for (size_t k = 1; k <= 40; k += 9) {
15189         GemmMicrokernelTester()
15190           .mr(3)
15191           .nr(8)
15192           .kr(2)
15193           .sr(1)
15194           .m(3)
15195           .n(n)
15196           .k(k)
15197           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15198       }
15199     }
15200   }
15201 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,n_gt_8_strided_cn)15202   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_gt_8_strided_cn) {
15203     TEST_REQUIRES_ARM_NEON;
15204     for (uint32_t n = 9; n < 16; n++) {
15205       for (size_t k = 1; k <= 40; k += 9) {
15206         GemmMicrokernelTester()
15207           .mr(3)
15208           .nr(8)
15209           .kr(2)
15210           .sr(1)
15211           .m(3)
15212           .n(n)
15213           .k(k)
15214           .cn_stride(11)
15215           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15216       }
15217     }
15218   }
15219 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,n_gt_8_subtile)15220   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_gt_8_subtile) {
15221     TEST_REQUIRES_ARM_NEON;
15222     for (uint32_t n = 9; n < 16; n++) {
15223       for (size_t k = 1; k <= 40; k += 9) {
15224         for (uint32_t m = 1; m <= 3; m++) {
15225           GemmMicrokernelTester()
15226             .mr(3)
15227             .nr(8)
15228             .kr(2)
15229             .sr(1)
15230             .m(m)
15231             .n(n)
15232             .k(k)
15233             .iterations(1)
15234             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15235         }
15236       }
15237     }
15238   }
15239 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,n_div_8)15240   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_div_8) {
15241     TEST_REQUIRES_ARM_NEON;
15242     for (uint32_t n = 16; n <= 24; n += 8) {
15243       for (size_t k = 1; k <= 40; k += 9) {
15244         GemmMicrokernelTester()
15245           .mr(3)
15246           .nr(8)
15247           .kr(2)
15248           .sr(1)
15249           .m(3)
15250           .n(n)
15251           .k(k)
15252           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15253       }
15254     }
15255   }
15256 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,n_div_8_strided_cn)15257   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_div_8_strided_cn) {
15258     TEST_REQUIRES_ARM_NEON;
15259     for (uint32_t n = 16; n <= 24; n += 8) {
15260       for (size_t k = 1; k <= 40; k += 9) {
15261         GemmMicrokernelTester()
15262           .mr(3)
15263           .nr(8)
15264           .kr(2)
15265           .sr(1)
15266           .m(3)
15267           .n(n)
15268           .k(k)
15269           .cn_stride(11)
15270           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15271       }
15272     }
15273   }
15274 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,n_div_8_subtile)15275   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_div_8_subtile) {
15276     TEST_REQUIRES_ARM_NEON;
15277     for (uint32_t n = 16; n <= 24; n += 8) {
15278       for (size_t k = 1; k <= 40; k += 9) {
15279         for (uint32_t m = 1; m <= 3; m++) {
15280           GemmMicrokernelTester()
15281             .mr(3)
15282             .nr(8)
15283             .kr(2)
15284             .sr(1)
15285             .m(m)
15286             .n(n)
15287             .k(k)
15288             .iterations(1)
15289             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15290         }
15291       }
15292     }
15293   }
15294 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,small_kernel)15295   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, small_kernel) {
15296     TEST_REQUIRES_ARM_NEON;
15297     for (size_t k = 1; k <= 40; k += 9) {
15298       GemmMicrokernelTester()
15299         .mr(3)
15300         .nr(8)
15301         .kr(2)
15302         .sr(1)
15303         .m(3)
15304         .n(8)
15305         .k(k)
15306         .ks(3)
15307         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15308     }
15309   }
15310 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,small_kernel_subtile)15311   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, small_kernel_subtile) {
15312     TEST_REQUIRES_ARM_NEON;
15313     for (size_t k = 1; k <= 40; k += 9) {
15314       for (uint32_t n = 1; n <= 8; n++) {
15315         for (uint32_t m = 1; m <= 3; m++) {
15316           GemmMicrokernelTester()
15317             .mr(3)
15318             .nr(8)
15319             .kr(2)
15320             .sr(1)
15321             .m(m)
15322             .n(n)
15323             .k(k)
15324             .ks(3)
15325             .iterations(1)
15326             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15327         }
15328       }
15329     }
15330   }
15331 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,n_gt_8_small_kernel)15332   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_gt_8_small_kernel) {
15333     TEST_REQUIRES_ARM_NEON;
15334     for (uint32_t n = 9; n < 16; n++) {
15335       for (size_t k = 1; k <= 40; k += 9) {
15336         GemmMicrokernelTester()
15337           .mr(3)
15338           .nr(8)
15339           .kr(2)
15340           .sr(1)
15341           .m(3)
15342           .n(n)
15343           .k(k)
15344           .ks(3)
15345           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15346       }
15347     }
15348   }
15349 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,n_div_8_small_kernel)15350   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_div_8_small_kernel) {
15351     TEST_REQUIRES_ARM_NEON;
15352     for (uint32_t n = 16; n <= 24; n += 8) {
15353       for (size_t k = 1; k <= 40; k += 9) {
15354         GemmMicrokernelTester()
15355           .mr(3)
15356           .nr(8)
15357           .kr(2)
15358           .sr(1)
15359           .m(3)
15360           .n(n)
15361           .k(k)
15362           .ks(3)
15363           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15364       }
15365     }
15366   }
15367 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,strided_cm_subtile)15368   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, strided_cm_subtile) {
15369     TEST_REQUIRES_ARM_NEON;
15370     for (size_t k = 1; k <= 40; k += 9) {
15371       for (uint32_t n = 1; n <= 8; n++) {
15372         for (uint32_t m = 1; m <= 3; m++) {
15373           GemmMicrokernelTester()
15374             .mr(3)
15375             .nr(8)
15376             .kr(2)
15377             .sr(1)
15378             .m(m)
15379             .n(n)
15380             .k(k)
15381             .cm_stride(11)
15382             .iterations(1)
15383             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15384         }
15385       }
15386     }
15387   }
15388 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,a_offset)15389   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, a_offset) {
15390     TEST_REQUIRES_ARM_NEON;
15391     for (size_t k = 1; k <= 40; k += 9) {
15392       GemmMicrokernelTester()
15393         .mr(3)
15394         .nr(8)
15395         .kr(2)
15396         .sr(1)
15397         .m(3)
15398         .n(8)
15399         .k(k)
15400         .ks(3)
15401         .a_offset(127)
15402         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15403     }
15404   }
15405 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,zero)15406   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, zero) {
15407     TEST_REQUIRES_ARM_NEON;
15408     for (size_t k = 1; k <= 40; k += 9) {
15409       for (uint32_t mz = 0; mz < 3; mz++) {
15410         GemmMicrokernelTester()
15411           .mr(3)
15412           .nr(8)
15413           .kr(2)
15414           .sr(1)
15415           .m(3)
15416           .n(8)
15417           .k(k)
15418           .ks(3)
15419           .a_offset(127)
15420           .zero_index(mz)
15421           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15422       }
15423     }
15424   }
15425 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,qmin)15426   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, qmin) {
15427     TEST_REQUIRES_ARM_NEON;
15428     GemmMicrokernelTester()
15429       .mr(3)
15430       .nr(8)
15431       .kr(2)
15432       .sr(1)
15433       .m(3)
15434       .n(8)
15435       .k(8)
15436       .qmin(128)
15437       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15438   }
15439 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,qmax)15440   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, qmax) {
15441     TEST_REQUIRES_ARM_NEON;
15442     GemmMicrokernelTester()
15443       .mr(3)
15444       .nr(8)
15445       .kr(2)
15446       .sr(1)
15447       .m(3)
15448       .n(8)
15449       .k(8)
15450       .qmax(128)
15451       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15452   }
15453 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP,strided_cm)15454   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, strided_cm) {
15455     TEST_REQUIRES_ARM_NEON;
15456     GemmMicrokernelTester()
15457       .mr(3)
15458       .nr(8)
15459       .kr(2)
15460       .sr(1)
15461       .m(3)
15462       .n(8)
15463       .k(8)
15464       .cm_stride(11)
15465       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15466   }
15467 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
15468 
15469 
15470 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,k_eq_8)15471   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, k_eq_8) {
15472     TEST_REQUIRES_ARM_NEON;
15473     GemmMicrokernelTester()
15474       .mr(1)
15475       .nr(16)
15476       .kr(2)
15477       .sr(1)
15478       .m(1)
15479       .n(16)
15480       .k(8)
15481       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15482   }
15483 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,strided_cn)15484   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, strided_cn) {
15485     TEST_REQUIRES_ARM_NEON;
15486     GemmMicrokernelTester()
15487       .mr(1)
15488       .nr(16)
15489       .kr(2)
15490       .sr(1)
15491       .m(1)
15492       .n(16)
15493       .k(8)
15494       .cn_stride(19)
15495       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15496   }
15497 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,k_eq_8_subtile)15498   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, k_eq_8_subtile) {
15499     TEST_REQUIRES_ARM_NEON;
15500     for (uint32_t n = 1; n <= 16; n++) {
15501       for (uint32_t m = 1; m <= 1; m++) {
15502         GemmMicrokernelTester()
15503           .mr(1)
15504           .nr(16)
15505           .kr(2)
15506           .sr(1)
15507           .m(m)
15508           .n(n)
15509           .k(8)
15510           .iterations(1)
15511           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15512       }
15513     }
15514   }
15515 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,k_eq_8_subtile_m)15516   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, k_eq_8_subtile_m) {
15517     TEST_REQUIRES_ARM_NEON;
15518     for (uint32_t m = 1; m <= 1; m++) {
15519       GemmMicrokernelTester()
15520         .mr(1)
15521         .nr(16)
15522         .kr(2)
15523         .sr(1)
15524         .m(m)
15525         .n(16)
15526         .k(8)
15527         .iterations(1)
15528         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15529     }
15530   }
15531 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,k_eq_8_subtile_n)15532   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, k_eq_8_subtile_n) {
15533     TEST_REQUIRES_ARM_NEON;
15534     for (uint32_t n = 1; n <= 16; n++) {
15535       GemmMicrokernelTester()
15536         .mr(1)
15537         .nr(16)
15538         .kr(2)
15539         .sr(1)
15540         .m(1)
15541         .n(n)
15542         .k(8)
15543         .iterations(1)
15544         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15545     }
15546   }
15547 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,k_lt_8)15548   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, k_lt_8) {
15549     TEST_REQUIRES_ARM_NEON;
15550     for (size_t k = 1; k < 8; k++) {
15551       GemmMicrokernelTester()
15552         .mr(1)
15553         .nr(16)
15554         .kr(2)
15555         .sr(1)
15556         .m(1)
15557         .n(16)
15558         .k(k)
15559         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15560     }
15561   }
15562 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,k_lt_8_subtile)15563   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, k_lt_8_subtile) {
15564     TEST_REQUIRES_ARM_NEON;
15565     for (size_t k = 1; k < 8; k++) {
15566       for (uint32_t n = 1; n <= 16; n++) {
15567         for (uint32_t m = 1; m <= 1; m++) {
15568           GemmMicrokernelTester()
15569             .mr(1)
15570             .nr(16)
15571             .kr(2)
15572             .sr(1)
15573             .m(m)
15574             .n(n)
15575             .k(k)
15576             .iterations(1)
15577             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15578         }
15579       }
15580     }
15581   }
15582 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,k_gt_8)15583   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, k_gt_8) {
15584     TEST_REQUIRES_ARM_NEON;
15585     for (size_t k = 9; k < 16; k++) {
15586       GemmMicrokernelTester()
15587         .mr(1)
15588         .nr(16)
15589         .kr(2)
15590         .sr(1)
15591         .m(1)
15592         .n(16)
15593         .k(k)
15594         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15595     }
15596   }
15597 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,k_gt_8_subtile)15598   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, k_gt_8_subtile) {
15599     TEST_REQUIRES_ARM_NEON;
15600     for (size_t k = 9; k < 16; k++) {
15601       for (uint32_t n = 1; n <= 16; n++) {
15602         for (uint32_t m = 1; m <= 1; m++) {
15603           GemmMicrokernelTester()
15604             .mr(1)
15605             .nr(16)
15606             .kr(2)
15607             .sr(1)
15608             .m(m)
15609             .n(n)
15610             .k(k)
15611             .iterations(1)
15612             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15613         }
15614       }
15615     }
15616   }
15617 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,k_div_8)15618   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, k_div_8) {
15619     TEST_REQUIRES_ARM_NEON;
15620     for (size_t k = 16; k <= 80; k += 8) {
15621       GemmMicrokernelTester()
15622         .mr(1)
15623         .nr(16)
15624         .kr(2)
15625         .sr(1)
15626         .m(1)
15627         .n(16)
15628         .k(k)
15629         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15630     }
15631   }
15632 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,k_div_8_subtile)15633   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, k_div_8_subtile) {
15634     TEST_REQUIRES_ARM_NEON;
15635     for (size_t k = 16; k <= 80; k += 8) {
15636       for (uint32_t n = 1; n <= 16; n++) {
15637         for (uint32_t m = 1; m <= 1; m++) {
15638           GemmMicrokernelTester()
15639             .mr(1)
15640             .nr(16)
15641             .kr(2)
15642             .sr(1)
15643             .m(m)
15644             .n(n)
15645             .k(k)
15646             .iterations(1)
15647             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15648         }
15649       }
15650     }
15651   }
15652 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,n_gt_16)15653   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, n_gt_16) {
15654     TEST_REQUIRES_ARM_NEON;
15655     for (uint32_t n = 17; n < 32; n++) {
15656       for (size_t k = 1; k <= 40; k += 9) {
15657         GemmMicrokernelTester()
15658           .mr(1)
15659           .nr(16)
15660           .kr(2)
15661           .sr(1)
15662           .m(1)
15663           .n(n)
15664           .k(k)
15665           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15666       }
15667     }
15668   }
15669 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,n_gt_16_strided_cn)15670   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, n_gt_16_strided_cn) {
15671     TEST_REQUIRES_ARM_NEON;
15672     for (uint32_t n = 17; n < 32; n++) {
15673       for (size_t k = 1; k <= 40; k += 9) {
15674         GemmMicrokernelTester()
15675           .mr(1)
15676           .nr(16)
15677           .kr(2)
15678           .sr(1)
15679           .m(1)
15680           .n(n)
15681           .k(k)
15682           .cn_stride(19)
15683           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15684       }
15685     }
15686   }
15687 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,n_gt_16_subtile)15688   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, n_gt_16_subtile) {
15689     TEST_REQUIRES_ARM_NEON;
15690     for (uint32_t n = 17; n < 32; n++) {
15691       for (size_t k = 1; k <= 40; k += 9) {
15692         for (uint32_t m = 1; m <= 1; m++) {
15693           GemmMicrokernelTester()
15694             .mr(1)
15695             .nr(16)
15696             .kr(2)
15697             .sr(1)
15698             .m(m)
15699             .n(n)
15700             .k(k)
15701             .iterations(1)
15702             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15703         }
15704       }
15705     }
15706   }
15707 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,n_div_16)15708   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, n_div_16) {
15709     TEST_REQUIRES_ARM_NEON;
15710     for (uint32_t n = 32; n <= 48; n += 16) {
15711       for (size_t k = 1; k <= 40; k += 9) {
15712         GemmMicrokernelTester()
15713           .mr(1)
15714           .nr(16)
15715           .kr(2)
15716           .sr(1)
15717           .m(1)
15718           .n(n)
15719           .k(k)
15720           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15721       }
15722     }
15723   }
15724 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,n_div_16_strided_cn)15725   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, n_div_16_strided_cn) {
15726     TEST_REQUIRES_ARM_NEON;
15727     for (uint32_t n = 32; n <= 48; n += 16) {
15728       for (size_t k = 1; k <= 40; k += 9) {
15729         GemmMicrokernelTester()
15730           .mr(1)
15731           .nr(16)
15732           .kr(2)
15733           .sr(1)
15734           .m(1)
15735           .n(n)
15736           .k(k)
15737           .cn_stride(19)
15738           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15739       }
15740     }
15741   }
15742 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,n_div_16_subtile)15743   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, n_div_16_subtile) {
15744     TEST_REQUIRES_ARM_NEON;
15745     for (uint32_t n = 32; n <= 48; n += 16) {
15746       for (size_t k = 1; k <= 40; k += 9) {
15747         for (uint32_t m = 1; m <= 1; m++) {
15748           GemmMicrokernelTester()
15749             .mr(1)
15750             .nr(16)
15751             .kr(2)
15752             .sr(1)
15753             .m(m)
15754             .n(n)
15755             .k(k)
15756             .iterations(1)
15757             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15758         }
15759       }
15760     }
15761   }
15762 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,small_kernel)15763   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, small_kernel) {
15764     TEST_REQUIRES_ARM_NEON;
15765     for (size_t k = 1; k <= 40; k += 9) {
15766       GemmMicrokernelTester()
15767         .mr(1)
15768         .nr(16)
15769         .kr(2)
15770         .sr(1)
15771         .m(1)
15772         .n(16)
15773         .k(k)
15774         .ks(3)
15775         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15776     }
15777   }
15778 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,small_kernel_subtile)15779   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, small_kernel_subtile) {
15780     TEST_REQUIRES_ARM_NEON;
15781     for (size_t k = 1; k <= 40; k += 9) {
15782       for (uint32_t n = 1; n <= 16; n++) {
15783         for (uint32_t m = 1; m <= 1; m++) {
15784           GemmMicrokernelTester()
15785             .mr(1)
15786             .nr(16)
15787             .kr(2)
15788             .sr(1)
15789             .m(m)
15790             .n(n)
15791             .k(k)
15792             .ks(3)
15793             .iterations(1)
15794             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15795         }
15796       }
15797     }
15798   }
15799 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,n_gt_16_small_kernel)15800   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, n_gt_16_small_kernel) {
15801     TEST_REQUIRES_ARM_NEON;
15802     for (uint32_t n = 17; n < 32; n++) {
15803       for (size_t k = 1; k <= 40; k += 9) {
15804         GemmMicrokernelTester()
15805           .mr(1)
15806           .nr(16)
15807           .kr(2)
15808           .sr(1)
15809           .m(1)
15810           .n(n)
15811           .k(k)
15812           .ks(3)
15813           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15814       }
15815     }
15816   }
15817 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,n_div_16_small_kernel)15818   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, n_div_16_small_kernel) {
15819     TEST_REQUIRES_ARM_NEON;
15820     for (uint32_t n = 32; n <= 48; n += 16) {
15821       for (size_t k = 1; k <= 40; k += 9) {
15822         GemmMicrokernelTester()
15823           .mr(1)
15824           .nr(16)
15825           .kr(2)
15826           .sr(1)
15827           .m(1)
15828           .n(n)
15829           .k(k)
15830           .ks(3)
15831           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15832       }
15833     }
15834   }
15835 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,strided_cm_subtile)15836   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, strided_cm_subtile) {
15837     TEST_REQUIRES_ARM_NEON;
15838     for (size_t k = 1; k <= 40; k += 9) {
15839       for (uint32_t n = 1; n <= 16; n++) {
15840         for (uint32_t m = 1; m <= 1; m++) {
15841           GemmMicrokernelTester()
15842             .mr(1)
15843             .nr(16)
15844             .kr(2)
15845             .sr(1)
15846             .m(m)
15847             .n(n)
15848             .k(k)
15849             .cm_stride(19)
15850             .iterations(1)
15851             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15852         }
15853       }
15854     }
15855   }
15856 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,a_offset)15857   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, a_offset) {
15858     TEST_REQUIRES_ARM_NEON;
15859     for (size_t k = 1; k <= 40; k += 9) {
15860       GemmMicrokernelTester()
15861         .mr(1)
15862         .nr(16)
15863         .kr(2)
15864         .sr(1)
15865         .m(1)
15866         .n(16)
15867         .k(k)
15868         .ks(3)
15869         .a_offset(43)
15870         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15871     }
15872   }
15873 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,zero)15874   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, zero) {
15875     TEST_REQUIRES_ARM_NEON;
15876     for (size_t k = 1; k <= 40; k += 9) {
15877       for (uint32_t mz = 0; mz < 1; mz++) {
15878         GemmMicrokernelTester()
15879           .mr(1)
15880           .nr(16)
15881           .kr(2)
15882           .sr(1)
15883           .m(1)
15884           .n(16)
15885           .k(k)
15886           .ks(3)
15887           .a_offset(43)
15888           .zero_index(mz)
15889           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15890       }
15891     }
15892   }
15893 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,qmin)15894   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, qmin) {
15895     TEST_REQUIRES_ARM_NEON;
15896     GemmMicrokernelTester()
15897       .mr(1)
15898       .nr(16)
15899       .kr(2)
15900       .sr(1)
15901       .m(1)
15902       .n(16)
15903       .k(8)
15904       .qmin(128)
15905       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15906   }
15907 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,qmax)15908   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, qmax) {
15909     TEST_REQUIRES_ARM_NEON;
15910     GemmMicrokernelTester()
15911       .mr(1)
15912       .nr(16)
15913       .kr(2)
15914       .sr(1)
15915       .m(1)
15916       .n(16)
15917       .k(8)
15918       .qmax(128)
15919       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15920   }
15921 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP,strided_cm)15922   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, strided_cm) {
15923     TEST_REQUIRES_ARM_NEON;
15924     GemmMicrokernelTester()
15925       .mr(1)
15926       .nr(16)
15927       .kr(2)
15928       .sr(1)
15929       .m(1)
15930       .n(16)
15931       .k(8)
15932       .cm_stride(19)
15933       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15934   }
15935 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
15936 
15937 
15938 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,k_eq_16)15939   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_eq_16) {
15940     TEST_REQUIRES_ARM_NEON;
15941     GemmMicrokernelTester()
15942       .mr(4)
15943       .nr(8)
15944       .kr(2)
15945       .sr(1)
15946       .m(4)
15947       .n(8)
15948       .k(16)
15949       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15950   }
15951 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,strided_cn)15952   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, strided_cn) {
15953     TEST_REQUIRES_ARM_NEON;
15954     GemmMicrokernelTester()
15955       .mr(4)
15956       .nr(8)
15957       .kr(2)
15958       .sr(1)
15959       .m(4)
15960       .n(8)
15961       .k(16)
15962       .cn_stride(11)
15963       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15964   }
15965 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,k_eq_16_subtile)15966   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_eq_16_subtile) {
15967     TEST_REQUIRES_ARM_NEON;
15968     for (uint32_t n = 1; n <= 8; n++) {
15969       for (uint32_t m = 1; m <= 4; m++) {
15970         GemmMicrokernelTester()
15971           .mr(4)
15972           .nr(8)
15973           .kr(2)
15974           .sr(1)
15975           .m(m)
15976           .n(n)
15977           .k(16)
15978           .iterations(1)
15979           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15980       }
15981     }
15982   }
15983 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,k_eq_16_subtile_m)15984   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
15985     TEST_REQUIRES_ARM_NEON;
15986     for (uint32_t m = 1; m <= 4; m++) {
15987       GemmMicrokernelTester()
15988         .mr(4)
15989         .nr(8)
15990         .kr(2)
15991         .sr(1)
15992         .m(m)
15993         .n(8)
15994         .k(16)
15995         .iterations(1)
15996         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15997     }
15998   }
15999 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,k_eq_16_subtile_n)16000   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
16001     TEST_REQUIRES_ARM_NEON;
16002     for (uint32_t n = 1; n <= 8; n++) {
16003       GemmMicrokernelTester()
16004         .mr(4)
16005         .nr(8)
16006         .kr(2)
16007         .sr(1)
16008         .m(4)
16009         .n(n)
16010         .k(16)
16011         .iterations(1)
16012         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16013     }
16014   }
16015 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,k_lt_16)16016   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_lt_16) {
16017     TEST_REQUIRES_ARM_NEON;
16018     for (size_t k = 1; k < 16; k++) {
16019       GemmMicrokernelTester()
16020         .mr(4)
16021         .nr(8)
16022         .kr(2)
16023         .sr(1)
16024         .m(4)
16025         .n(8)
16026         .k(k)
16027         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16028     }
16029   }
16030 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,k_lt_16_subtile)16031   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_lt_16_subtile) {
16032     TEST_REQUIRES_ARM_NEON;
16033     for (size_t k = 1; k < 16; k++) {
16034       for (uint32_t n = 1; n <= 8; n++) {
16035         for (uint32_t m = 1; m <= 4; m++) {
16036           GemmMicrokernelTester()
16037             .mr(4)
16038             .nr(8)
16039             .kr(2)
16040             .sr(1)
16041             .m(m)
16042             .n(n)
16043             .k(k)
16044             .iterations(1)
16045             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16046         }
16047       }
16048     }
16049   }
16050 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,k_gt_16)16051   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_gt_16) {
16052     TEST_REQUIRES_ARM_NEON;
16053     for (size_t k = 17; k < 32; k++) {
16054       GemmMicrokernelTester()
16055         .mr(4)
16056         .nr(8)
16057         .kr(2)
16058         .sr(1)
16059         .m(4)
16060         .n(8)
16061         .k(k)
16062         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16063     }
16064   }
16065 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,k_gt_16_subtile)16066   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_gt_16_subtile) {
16067     TEST_REQUIRES_ARM_NEON;
16068     for (size_t k = 17; k < 32; k++) {
16069       for (uint32_t n = 1; n <= 8; n++) {
16070         for (uint32_t m = 1; m <= 4; m++) {
16071           GemmMicrokernelTester()
16072             .mr(4)
16073             .nr(8)
16074             .kr(2)
16075             .sr(1)
16076             .m(m)
16077             .n(n)
16078             .k(k)
16079             .iterations(1)
16080             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16081         }
16082       }
16083     }
16084   }
16085 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,k_div_16)16086   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_div_16) {
16087     TEST_REQUIRES_ARM_NEON;
16088     for (size_t k = 32; k <= 160; k += 16) {
16089       GemmMicrokernelTester()
16090         .mr(4)
16091         .nr(8)
16092         .kr(2)
16093         .sr(1)
16094         .m(4)
16095         .n(8)
16096         .k(k)
16097         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16098     }
16099   }
16100 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,k_div_16_subtile)16101   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_div_16_subtile) {
16102     TEST_REQUIRES_ARM_NEON;
16103     for (size_t k = 32; k <= 160; k += 16) {
16104       for (uint32_t n = 1; n <= 8; n++) {
16105         for (uint32_t m = 1; m <= 4; m++) {
16106           GemmMicrokernelTester()
16107             .mr(4)
16108             .nr(8)
16109             .kr(2)
16110             .sr(1)
16111             .m(m)
16112             .n(n)
16113             .k(k)
16114             .iterations(1)
16115             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16116         }
16117       }
16118     }
16119   }
16120 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,n_gt_8)16121   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_gt_8) {
16122     TEST_REQUIRES_ARM_NEON;
16123     for (uint32_t n = 9; n < 16; n++) {
16124       for (size_t k = 1; k <= 80; k += 17) {
16125         GemmMicrokernelTester()
16126           .mr(4)
16127           .nr(8)
16128           .kr(2)
16129           .sr(1)
16130           .m(4)
16131           .n(n)
16132           .k(k)
16133           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16134       }
16135     }
16136   }
16137 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,n_gt_8_strided_cn)16138   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_gt_8_strided_cn) {
16139     TEST_REQUIRES_ARM_NEON;
16140     for (uint32_t n = 9; n < 16; n++) {
16141       for (size_t k = 1; k <= 80; k += 17) {
16142         GemmMicrokernelTester()
16143           .mr(4)
16144           .nr(8)
16145           .kr(2)
16146           .sr(1)
16147           .m(4)
16148           .n(n)
16149           .k(k)
16150           .cn_stride(11)
16151           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16152       }
16153     }
16154   }
16155 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,n_gt_8_subtile)16156   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_gt_8_subtile) {
16157     TEST_REQUIRES_ARM_NEON;
16158     for (uint32_t n = 9; n < 16; n++) {
16159       for (size_t k = 1; k <= 80; k += 17) {
16160         for (uint32_t m = 1; m <= 4; m++) {
16161           GemmMicrokernelTester()
16162             .mr(4)
16163             .nr(8)
16164             .kr(2)
16165             .sr(1)
16166             .m(m)
16167             .n(n)
16168             .k(k)
16169             .iterations(1)
16170             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16171         }
16172       }
16173     }
16174   }
16175 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,n_div_8)16176   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_div_8) {
16177     TEST_REQUIRES_ARM_NEON;
16178     for (uint32_t n = 16; n <= 24; n += 8) {
16179       for (size_t k = 1; k <= 80; k += 17) {
16180         GemmMicrokernelTester()
16181           .mr(4)
16182           .nr(8)
16183           .kr(2)
16184           .sr(1)
16185           .m(4)
16186           .n(n)
16187           .k(k)
16188           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16189       }
16190     }
16191   }
16192 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,n_div_8_strided_cn)16193   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_div_8_strided_cn) {
16194     TEST_REQUIRES_ARM_NEON;
16195     for (uint32_t n = 16; n <= 24; n += 8) {
16196       for (size_t k = 1; k <= 80; k += 17) {
16197         GemmMicrokernelTester()
16198           .mr(4)
16199           .nr(8)
16200           .kr(2)
16201           .sr(1)
16202           .m(4)
16203           .n(n)
16204           .k(k)
16205           .cn_stride(11)
16206           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16207       }
16208     }
16209   }
16210 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,n_div_8_subtile)16211   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_div_8_subtile) {
16212     TEST_REQUIRES_ARM_NEON;
16213     for (uint32_t n = 16; n <= 24; n += 8) {
16214       for (size_t k = 1; k <= 80; k += 17) {
16215         for (uint32_t m = 1; m <= 4; m++) {
16216           GemmMicrokernelTester()
16217             .mr(4)
16218             .nr(8)
16219             .kr(2)
16220             .sr(1)
16221             .m(m)
16222             .n(n)
16223             .k(k)
16224             .iterations(1)
16225             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16226         }
16227       }
16228     }
16229   }
16230 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,small_kernel)16231   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, small_kernel) {
16232     TEST_REQUIRES_ARM_NEON;
16233     for (size_t k = 1; k <= 80; k += 17) {
16234       GemmMicrokernelTester()
16235         .mr(4)
16236         .nr(8)
16237         .kr(2)
16238         .sr(1)
16239         .m(4)
16240         .n(8)
16241         .k(k)
16242         .ks(3)
16243         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16244     }
16245   }
16246 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,small_kernel_subtile)16247   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, small_kernel_subtile) {
16248     TEST_REQUIRES_ARM_NEON;
16249     for (size_t k = 1; k <= 80; k += 17) {
16250       for (uint32_t n = 1; n <= 8; n++) {
16251         for (uint32_t m = 1; m <= 4; m++) {
16252           GemmMicrokernelTester()
16253             .mr(4)
16254             .nr(8)
16255             .kr(2)
16256             .sr(1)
16257             .m(m)
16258             .n(n)
16259             .k(k)
16260             .ks(3)
16261             .iterations(1)
16262             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16263         }
16264       }
16265     }
16266   }
16267 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,n_gt_8_small_kernel)16268   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_gt_8_small_kernel) {
16269     TEST_REQUIRES_ARM_NEON;
16270     for (uint32_t n = 9; n < 16; n++) {
16271       for (size_t k = 1; k <= 80; k += 17) {
16272         GemmMicrokernelTester()
16273           .mr(4)
16274           .nr(8)
16275           .kr(2)
16276           .sr(1)
16277           .m(4)
16278           .n(n)
16279           .k(k)
16280           .ks(3)
16281           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16282       }
16283     }
16284   }
16285 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,n_div_8_small_kernel)16286   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_div_8_small_kernel) {
16287     TEST_REQUIRES_ARM_NEON;
16288     for (uint32_t n = 16; n <= 24; n += 8) {
16289       for (size_t k = 1; k <= 80; k += 17) {
16290         GemmMicrokernelTester()
16291           .mr(4)
16292           .nr(8)
16293           .kr(2)
16294           .sr(1)
16295           .m(4)
16296           .n(n)
16297           .k(k)
16298           .ks(3)
16299           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16300       }
16301     }
16302   }
16303 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,strided_cm_subtile)16304   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, strided_cm_subtile) {
16305     TEST_REQUIRES_ARM_NEON;
16306     for (size_t k = 1; k <= 80; k += 17) {
16307       for (uint32_t n = 1; n <= 8; n++) {
16308         for (uint32_t m = 1; m <= 4; m++) {
16309           GemmMicrokernelTester()
16310             .mr(4)
16311             .nr(8)
16312             .kr(2)
16313             .sr(1)
16314             .m(m)
16315             .n(n)
16316             .k(k)
16317             .cm_stride(11)
16318             .iterations(1)
16319             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16320         }
16321       }
16322     }
16323   }
16324 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,a_offset)16325   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, a_offset) {
16326     TEST_REQUIRES_ARM_NEON;
16327     for (size_t k = 1; k <= 80; k += 17) {
16328       GemmMicrokernelTester()
16329         .mr(4)
16330         .nr(8)
16331         .kr(2)
16332         .sr(1)
16333         .m(4)
16334         .n(8)
16335         .k(k)
16336         .ks(3)
16337         .a_offset(331)
16338         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16339     }
16340   }
16341 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,zero)16342   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, zero) {
16343     TEST_REQUIRES_ARM_NEON;
16344     for (size_t k = 1; k <= 80; k += 17) {
16345       for (uint32_t mz = 0; mz < 4; mz++) {
16346         GemmMicrokernelTester()
16347           .mr(4)
16348           .nr(8)
16349           .kr(2)
16350           .sr(1)
16351           .m(4)
16352           .n(8)
16353           .k(k)
16354           .ks(3)
16355           .a_offset(331)
16356           .zero_index(mz)
16357           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16358       }
16359     }
16360   }
16361 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,qmin)16362   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, qmin) {
16363     TEST_REQUIRES_ARM_NEON;
16364     GemmMicrokernelTester()
16365       .mr(4)
16366       .nr(8)
16367       .kr(2)
16368       .sr(1)
16369       .m(4)
16370       .n(8)
16371       .k(16)
16372       .qmin(128)
16373       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16374   }
16375 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,qmax)16376   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, qmax) {
16377     TEST_REQUIRES_ARM_NEON;
16378     GemmMicrokernelTester()
16379       .mr(4)
16380       .nr(8)
16381       .kr(2)
16382       .sr(1)
16383       .m(4)
16384       .n(8)
16385       .k(16)
16386       .qmax(128)
16387       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16388   }
16389 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP,strided_cm)16390   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, strided_cm) {
16391     TEST_REQUIRES_ARM_NEON;
16392     GemmMicrokernelTester()
16393       .mr(4)
16394       .nr(8)
16395       .kr(2)
16396       .sr(1)
16397       .m(4)
16398       .n(8)
16399       .k(16)
16400       .cm_stride(11)
16401       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16402   }
16403 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
16404 
16405 
16406 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,k_eq_16)16407   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_eq_16) {
16408     TEST_REQUIRES_ARM_NEON;
16409     GemmMicrokernelTester()
16410       .mr(2)
16411       .nr(16)
16412       .kr(2)
16413       .sr(1)
16414       .m(2)
16415       .n(16)
16416       .k(16)
16417       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16418   }
16419 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,strided_cn)16420   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, strided_cn) {
16421     TEST_REQUIRES_ARM_NEON;
16422     GemmMicrokernelTester()
16423       .mr(2)
16424       .nr(16)
16425       .kr(2)
16426       .sr(1)
16427       .m(2)
16428       .n(16)
16429       .k(16)
16430       .cn_stride(19)
16431       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16432   }
16433 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,k_eq_16_subtile)16434   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_eq_16_subtile) {
16435     TEST_REQUIRES_ARM_NEON;
16436     for (uint32_t n = 1; n <= 16; n++) {
16437       for (uint32_t m = 1; m <= 2; m++) {
16438         GemmMicrokernelTester()
16439           .mr(2)
16440           .nr(16)
16441           .kr(2)
16442           .sr(1)
16443           .m(m)
16444           .n(n)
16445           .k(16)
16446           .iterations(1)
16447           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16448       }
16449     }
16450   }
16451 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,k_eq_16_subtile_m)16452   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
16453     TEST_REQUIRES_ARM_NEON;
16454     for (uint32_t m = 1; m <= 2; m++) {
16455       GemmMicrokernelTester()
16456         .mr(2)
16457         .nr(16)
16458         .kr(2)
16459         .sr(1)
16460         .m(m)
16461         .n(16)
16462         .k(16)
16463         .iterations(1)
16464         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16465     }
16466   }
16467 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,k_eq_16_subtile_n)16468   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
16469     TEST_REQUIRES_ARM_NEON;
16470     for (uint32_t n = 1; n <= 16; n++) {
16471       GemmMicrokernelTester()
16472         .mr(2)
16473         .nr(16)
16474         .kr(2)
16475         .sr(1)
16476         .m(2)
16477         .n(n)
16478         .k(16)
16479         .iterations(1)
16480         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16481     }
16482   }
16483 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,k_lt_16)16484   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_lt_16) {
16485     TEST_REQUIRES_ARM_NEON;
16486     for (size_t k = 1; k < 16; k++) {
16487       GemmMicrokernelTester()
16488         .mr(2)
16489         .nr(16)
16490         .kr(2)
16491         .sr(1)
16492         .m(2)
16493         .n(16)
16494         .k(k)
16495         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16496     }
16497   }
16498 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,k_lt_16_subtile)16499   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_lt_16_subtile) {
16500     TEST_REQUIRES_ARM_NEON;
16501     for (size_t k = 1; k < 16; k++) {
16502       for (uint32_t n = 1; n <= 16; n++) {
16503         for (uint32_t m = 1; m <= 2; m++) {
16504           GemmMicrokernelTester()
16505             .mr(2)
16506             .nr(16)
16507             .kr(2)
16508             .sr(1)
16509             .m(m)
16510             .n(n)
16511             .k(k)
16512             .iterations(1)
16513             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16514         }
16515       }
16516     }
16517   }
16518 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,k_gt_16)16519   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_gt_16) {
16520     TEST_REQUIRES_ARM_NEON;
16521     for (size_t k = 17; k < 32; k++) {
16522       GemmMicrokernelTester()
16523         .mr(2)
16524         .nr(16)
16525         .kr(2)
16526         .sr(1)
16527         .m(2)
16528         .n(16)
16529         .k(k)
16530         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16531     }
16532   }
16533 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,k_gt_16_subtile)16534   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_gt_16_subtile) {
16535     TEST_REQUIRES_ARM_NEON;
16536     for (size_t k = 17; k < 32; k++) {
16537       for (uint32_t n = 1; n <= 16; n++) {
16538         for (uint32_t m = 1; m <= 2; m++) {
16539           GemmMicrokernelTester()
16540             .mr(2)
16541             .nr(16)
16542             .kr(2)
16543             .sr(1)
16544             .m(m)
16545             .n(n)
16546             .k(k)
16547             .iterations(1)
16548             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16549         }
16550       }
16551     }
16552   }
16553 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,k_div_16)16554   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_div_16) {
16555     TEST_REQUIRES_ARM_NEON;
16556     for (size_t k = 32; k <= 160; k += 16) {
16557       GemmMicrokernelTester()
16558         .mr(2)
16559         .nr(16)
16560         .kr(2)
16561         .sr(1)
16562         .m(2)
16563         .n(16)
16564         .k(k)
16565         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16566     }
16567   }
16568 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,k_div_16_subtile)16569   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_div_16_subtile) {
16570     TEST_REQUIRES_ARM_NEON;
16571     for (size_t k = 32; k <= 160; k += 16) {
16572       for (uint32_t n = 1; n <= 16; n++) {
16573         for (uint32_t m = 1; m <= 2; m++) {
16574           GemmMicrokernelTester()
16575             .mr(2)
16576             .nr(16)
16577             .kr(2)
16578             .sr(1)
16579             .m(m)
16580             .n(n)
16581             .k(k)
16582             .iterations(1)
16583             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16584         }
16585       }
16586     }
16587   }
16588 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,n_gt_16)16589   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_gt_16) {
16590     TEST_REQUIRES_ARM_NEON;
16591     for (uint32_t n = 17; n < 32; n++) {
16592       for (size_t k = 1; k <= 80; k += 17) {
16593         GemmMicrokernelTester()
16594           .mr(2)
16595           .nr(16)
16596           .kr(2)
16597           .sr(1)
16598           .m(2)
16599           .n(n)
16600           .k(k)
16601           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16602       }
16603     }
16604   }
16605 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,n_gt_16_strided_cn)16606   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_gt_16_strided_cn) {
16607     TEST_REQUIRES_ARM_NEON;
16608     for (uint32_t n = 17; n < 32; n++) {
16609       for (size_t k = 1; k <= 80; k += 17) {
16610         GemmMicrokernelTester()
16611           .mr(2)
16612           .nr(16)
16613           .kr(2)
16614           .sr(1)
16615           .m(2)
16616           .n(n)
16617           .k(k)
16618           .cn_stride(19)
16619           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16620       }
16621     }
16622   }
16623 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,n_gt_16_subtile)16624   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_gt_16_subtile) {
16625     TEST_REQUIRES_ARM_NEON;
16626     for (uint32_t n = 17; n < 32; n++) {
16627       for (size_t k = 1; k <= 80; k += 17) {
16628         for (uint32_t m = 1; m <= 2; m++) {
16629           GemmMicrokernelTester()
16630             .mr(2)
16631             .nr(16)
16632             .kr(2)
16633             .sr(1)
16634             .m(m)
16635             .n(n)
16636             .k(k)
16637             .iterations(1)
16638             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16639         }
16640       }
16641     }
16642   }
16643 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,n_div_16)16644   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_div_16) {
16645     TEST_REQUIRES_ARM_NEON;
16646     for (uint32_t n = 32; n <= 48; n += 16) {
16647       for (size_t k = 1; k <= 80; k += 17) {
16648         GemmMicrokernelTester()
16649           .mr(2)
16650           .nr(16)
16651           .kr(2)
16652           .sr(1)
16653           .m(2)
16654           .n(n)
16655           .k(k)
16656           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16657       }
16658     }
16659   }
16660 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,n_div_16_strided_cn)16661   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_div_16_strided_cn) {
16662     TEST_REQUIRES_ARM_NEON;
16663     for (uint32_t n = 32; n <= 48; n += 16) {
16664       for (size_t k = 1; k <= 80; k += 17) {
16665         GemmMicrokernelTester()
16666           .mr(2)
16667           .nr(16)
16668           .kr(2)
16669           .sr(1)
16670           .m(2)
16671           .n(n)
16672           .k(k)
16673           .cn_stride(19)
16674           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16675       }
16676     }
16677   }
16678 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,n_div_16_subtile)16679   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_div_16_subtile) {
16680     TEST_REQUIRES_ARM_NEON;
16681     for (uint32_t n = 32; n <= 48; n += 16) {
16682       for (size_t k = 1; k <= 80; k += 17) {
16683         for (uint32_t m = 1; m <= 2; m++) {
16684           GemmMicrokernelTester()
16685             .mr(2)
16686             .nr(16)
16687             .kr(2)
16688             .sr(1)
16689             .m(m)
16690             .n(n)
16691             .k(k)
16692             .iterations(1)
16693             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16694         }
16695       }
16696     }
16697   }
16698 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,small_kernel)16699   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, small_kernel) {
16700     TEST_REQUIRES_ARM_NEON;
16701     for (size_t k = 1; k <= 80; k += 17) {
16702       GemmMicrokernelTester()
16703         .mr(2)
16704         .nr(16)
16705         .kr(2)
16706         .sr(1)
16707         .m(2)
16708         .n(16)
16709         .k(k)
16710         .ks(3)
16711         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16712     }
16713   }
16714 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,small_kernel_subtile)16715   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, small_kernel_subtile) {
16716     TEST_REQUIRES_ARM_NEON;
16717     for (size_t k = 1; k <= 80; k += 17) {
16718       for (uint32_t n = 1; n <= 16; n++) {
16719         for (uint32_t m = 1; m <= 2; m++) {
16720           GemmMicrokernelTester()
16721             .mr(2)
16722             .nr(16)
16723             .kr(2)
16724             .sr(1)
16725             .m(m)
16726             .n(n)
16727             .k(k)
16728             .ks(3)
16729             .iterations(1)
16730             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16731         }
16732       }
16733     }
16734   }
16735 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,n_gt_16_small_kernel)16736   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_gt_16_small_kernel) {
16737     TEST_REQUIRES_ARM_NEON;
16738     for (uint32_t n = 17; n < 32; n++) {
16739       for (size_t k = 1; k <= 80; k += 17) {
16740         GemmMicrokernelTester()
16741           .mr(2)
16742           .nr(16)
16743           .kr(2)
16744           .sr(1)
16745           .m(2)
16746           .n(n)
16747           .k(k)
16748           .ks(3)
16749           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16750       }
16751     }
16752   }
16753 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,n_div_16_small_kernel)16754   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_div_16_small_kernel) {
16755     TEST_REQUIRES_ARM_NEON;
16756     for (uint32_t n = 32; n <= 48; n += 16) {
16757       for (size_t k = 1; k <= 80; k += 17) {
16758         GemmMicrokernelTester()
16759           .mr(2)
16760           .nr(16)
16761           .kr(2)
16762           .sr(1)
16763           .m(2)
16764           .n(n)
16765           .k(k)
16766           .ks(3)
16767           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16768       }
16769     }
16770   }
16771 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,strided_cm_subtile)16772   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, strided_cm_subtile) {
16773     TEST_REQUIRES_ARM_NEON;
16774     for (size_t k = 1; k <= 80; k += 17) {
16775       for (uint32_t n = 1; n <= 16; n++) {
16776         for (uint32_t m = 1; m <= 2; m++) {
16777           GemmMicrokernelTester()
16778             .mr(2)
16779             .nr(16)
16780             .kr(2)
16781             .sr(1)
16782             .m(m)
16783             .n(n)
16784             .k(k)
16785             .cm_stride(19)
16786             .iterations(1)
16787             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16788         }
16789       }
16790     }
16791   }
16792 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,a_offset)16793   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, a_offset) {
16794     TEST_REQUIRES_ARM_NEON;
16795     for (size_t k = 1; k <= 80; k += 17) {
16796       GemmMicrokernelTester()
16797         .mr(2)
16798         .nr(16)
16799         .kr(2)
16800         .sr(1)
16801         .m(2)
16802         .n(16)
16803         .k(k)
16804         .ks(3)
16805         .a_offset(163)
16806         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16807     }
16808   }
16809 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,zero)16810   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, zero) {
16811     TEST_REQUIRES_ARM_NEON;
16812     for (size_t k = 1; k <= 80; k += 17) {
16813       for (uint32_t mz = 0; mz < 2; mz++) {
16814         GemmMicrokernelTester()
16815           .mr(2)
16816           .nr(16)
16817           .kr(2)
16818           .sr(1)
16819           .m(2)
16820           .n(16)
16821           .k(k)
16822           .ks(3)
16823           .a_offset(163)
16824           .zero_index(mz)
16825           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16826       }
16827     }
16828   }
16829 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,qmin)16830   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, qmin) {
16831     TEST_REQUIRES_ARM_NEON;
16832     GemmMicrokernelTester()
16833       .mr(2)
16834       .nr(16)
16835       .kr(2)
16836       .sr(1)
16837       .m(2)
16838       .n(16)
16839       .k(16)
16840       .qmin(128)
16841       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16842   }
16843 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,qmax)16844   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, qmax) {
16845     TEST_REQUIRES_ARM_NEON;
16846     GemmMicrokernelTester()
16847       .mr(2)
16848       .nr(16)
16849       .kr(2)
16850       .sr(1)
16851       .m(2)
16852       .n(16)
16853       .k(16)
16854       .qmax(128)
16855       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16856   }
16857 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP,strided_cm)16858   TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, strided_cm) {
16859     TEST_REQUIRES_ARM_NEON;
16860     GemmMicrokernelTester()
16861       .mr(2)
16862       .nr(16)
16863       .kr(2)
16864       .sr(1)
16865       .m(2)
16866       .n(16)
16867       .k(16)
16868       .cm_stride(19)
16869       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16870   }
16871 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
16872 
16873 
16874 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16)16875   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16) {
16876     TEST_REQUIRES_ARM_NEON;
16877     GemmMicrokernelTester()
16878       .mr(1)
16879       .nr(8)
16880       .kr(8)
16881       .sr(1)
16882       .m(1)
16883       .n(8)
16884       .k(16)
16885       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16886   }
16887 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cn)16888   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cn) {
16889     TEST_REQUIRES_ARM_NEON;
16890     GemmMicrokernelTester()
16891       .mr(1)
16892       .nr(8)
16893       .kr(8)
16894       .sr(1)
16895       .m(1)
16896       .n(8)
16897       .k(16)
16898       .cn_stride(11)
16899       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16900   }
16901 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile)16902   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile) {
16903     TEST_REQUIRES_ARM_NEON;
16904     for (uint32_t n = 1; n <= 8; n++) {
16905       for (uint32_t m = 1; m <= 1; m++) {
16906         GemmMicrokernelTester()
16907           .mr(1)
16908           .nr(8)
16909           .kr(8)
16910           .sr(1)
16911           .m(m)
16912           .n(n)
16913           .k(16)
16914           .iterations(1)
16915           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16916       }
16917     }
16918   }
16919 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile_m)16920   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_m) {
16921     TEST_REQUIRES_ARM_NEON;
16922     for (uint32_t m = 1; m <= 1; m++) {
16923       GemmMicrokernelTester()
16924         .mr(1)
16925         .nr(8)
16926         .kr(8)
16927         .sr(1)
16928         .m(m)
16929         .n(8)
16930         .k(16)
16931         .iterations(1)
16932         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16933     }
16934   }
16935 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile_n)16936   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_n) {
16937     TEST_REQUIRES_ARM_NEON;
16938     for (uint32_t n = 1; n <= 8; n++) {
16939       GemmMicrokernelTester()
16940         .mr(1)
16941         .nr(8)
16942         .kr(8)
16943         .sr(1)
16944         .m(1)
16945         .n(n)
16946         .k(16)
16947         .iterations(1)
16948         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16949     }
16950   }
16951 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16)16952   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16) {
16953     TEST_REQUIRES_ARM_NEON;
16954     for (size_t k = 1; k < 16; k++) {
16955       GemmMicrokernelTester()
16956         .mr(1)
16957         .nr(8)
16958         .kr(8)
16959         .sr(1)
16960         .m(1)
16961         .n(8)
16962         .k(k)
16963         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16964     }
16965   }
16966 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16_subtile)16967   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16_subtile) {
16968     TEST_REQUIRES_ARM_NEON;
16969     for (size_t k = 1; k < 16; k++) {
16970       for (uint32_t n = 1; n <= 8; n++) {
16971         for (uint32_t m = 1; m <= 1; m++) {
16972           GemmMicrokernelTester()
16973             .mr(1)
16974             .nr(8)
16975             .kr(8)
16976             .sr(1)
16977             .m(m)
16978             .n(n)
16979             .k(k)
16980             .iterations(1)
16981             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16982         }
16983       }
16984     }
16985   }
16986 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16)16987   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16) {
16988     TEST_REQUIRES_ARM_NEON;
16989     for (size_t k = 17; k < 32; k++) {
16990       GemmMicrokernelTester()
16991         .mr(1)
16992         .nr(8)
16993         .kr(8)
16994         .sr(1)
16995         .m(1)
16996         .n(8)
16997         .k(k)
16998         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16999     }
17000   }
17001 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16_subtile)17002   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16_subtile) {
17003     TEST_REQUIRES_ARM_NEON;
17004     for (size_t k = 17; k < 32; k++) {
17005       for (uint32_t n = 1; n <= 8; n++) {
17006         for (uint32_t m = 1; m <= 1; m++) {
17007           GemmMicrokernelTester()
17008             .mr(1)
17009             .nr(8)
17010             .kr(8)
17011             .sr(1)
17012             .m(m)
17013             .n(n)
17014             .k(k)
17015             .iterations(1)
17016             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17017         }
17018       }
17019     }
17020   }
17021 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16)17022   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16) {
17023     TEST_REQUIRES_ARM_NEON;
17024     for (size_t k = 32; k <= 160; k += 16) {
17025       GemmMicrokernelTester()
17026         .mr(1)
17027         .nr(8)
17028         .kr(8)
17029         .sr(1)
17030         .m(1)
17031         .n(8)
17032         .k(k)
17033         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17034     }
17035   }
17036 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16_subtile)17037   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16_subtile) {
17038     TEST_REQUIRES_ARM_NEON;
17039     for (size_t k = 32; k <= 160; k += 16) {
17040       for (uint32_t n = 1; n <= 8; n++) {
17041         for (uint32_t m = 1; m <= 1; m++) {
17042           GemmMicrokernelTester()
17043             .mr(1)
17044             .nr(8)
17045             .kr(8)
17046             .sr(1)
17047             .m(m)
17048             .n(n)
17049             .k(k)
17050             .iterations(1)
17051             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17052         }
17053       }
17054     }
17055   }
17056 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8)17057   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8) {
17058     TEST_REQUIRES_ARM_NEON;
17059     for (uint32_t n = 9; n < 16; n++) {
17060       for (size_t k = 1; k <= 80; k += 17) {
17061         GemmMicrokernelTester()
17062           .mr(1)
17063           .nr(8)
17064           .kr(8)
17065           .sr(1)
17066           .m(1)
17067           .n(n)
17068           .k(k)
17069           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17070       }
17071     }
17072   }
17073 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_strided_cn)17074   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_strided_cn) {
17075     TEST_REQUIRES_ARM_NEON;
17076     for (uint32_t n = 9; n < 16; n++) {
17077       for (size_t k = 1; k <= 80; k += 17) {
17078         GemmMicrokernelTester()
17079           .mr(1)
17080           .nr(8)
17081           .kr(8)
17082           .sr(1)
17083           .m(1)
17084           .n(n)
17085           .k(k)
17086           .cn_stride(11)
17087           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17088       }
17089     }
17090   }
17091 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_subtile)17092   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_subtile) {
17093     TEST_REQUIRES_ARM_NEON;
17094     for (uint32_t n = 9; n < 16; n++) {
17095       for (size_t k = 1; k <= 80; k += 17) {
17096         for (uint32_t m = 1; m <= 1; m++) {
17097           GemmMicrokernelTester()
17098             .mr(1)
17099             .nr(8)
17100             .kr(8)
17101             .sr(1)
17102             .m(m)
17103             .n(n)
17104             .k(k)
17105             .iterations(1)
17106             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17107         }
17108       }
17109     }
17110   }
17111 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8)17112   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8) {
17113     TEST_REQUIRES_ARM_NEON;
17114     for (uint32_t n = 16; n <= 24; n += 8) {
17115       for (size_t k = 1; k <= 80; k += 17) {
17116         GemmMicrokernelTester()
17117           .mr(1)
17118           .nr(8)
17119           .kr(8)
17120           .sr(1)
17121           .m(1)
17122           .n(n)
17123           .k(k)
17124           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17125       }
17126     }
17127   }
17128 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_strided_cn)17129   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_strided_cn) {
17130     TEST_REQUIRES_ARM_NEON;
17131     for (uint32_t n = 16; n <= 24; n += 8) {
17132       for (size_t k = 1; k <= 80; k += 17) {
17133         GemmMicrokernelTester()
17134           .mr(1)
17135           .nr(8)
17136           .kr(8)
17137           .sr(1)
17138           .m(1)
17139           .n(n)
17140           .k(k)
17141           .cn_stride(11)
17142           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17143       }
17144     }
17145   }
17146 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_subtile)17147   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_subtile) {
17148     TEST_REQUIRES_ARM_NEON;
17149     for (uint32_t n = 16; n <= 24; n += 8) {
17150       for (size_t k = 1; k <= 80; k += 17) {
17151         for (uint32_t m = 1; m <= 1; m++) {
17152           GemmMicrokernelTester()
17153             .mr(1)
17154             .nr(8)
17155             .kr(8)
17156             .sr(1)
17157             .m(m)
17158             .n(n)
17159             .k(k)
17160             .iterations(1)
17161             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17162         }
17163       }
17164     }
17165   }
17166 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,small_kernel)17167   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, small_kernel) {
17168     TEST_REQUIRES_ARM_NEON;
17169     for (size_t k = 1; k <= 80; k += 17) {
17170       GemmMicrokernelTester()
17171         .mr(1)
17172         .nr(8)
17173         .kr(8)
17174         .sr(1)
17175         .m(1)
17176         .n(8)
17177         .k(k)
17178         .ks(3)
17179         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17180     }
17181   }
17182 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,small_kernel_subtile)17183   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, small_kernel_subtile) {
17184     TEST_REQUIRES_ARM_NEON;
17185     for (size_t k = 1; k <= 80; k += 17) {
17186       for (uint32_t n = 1; n <= 8; n++) {
17187         for (uint32_t m = 1; m <= 1; m++) {
17188           GemmMicrokernelTester()
17189             .mr(1)
17190             .nr(8)
17191             .kr(8)
17192             .sr(1)
17193             .m(m)
17194             .n(n)
17195             .k(k)
17196             .ks(3)
17197             .iterations(1)
17198             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17199         }
17200       }
17201     }
17202   }
17203 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_small_kernel)17204   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_small_kernel) {
17205     TEST_REQUIRES_ARM_NEON;
17206     for (uint32_t n = 9; n < 16; n++) {
17207       for (size_t k = 1; k <= 80; k += 17) {
17208         GemmMicrokernelTester()
17209           .mr(1)
17210           .nr(8)
17211           .kr(8)
17212           .sr(1)
17213           .m(1)
17214           .n(n)
17215           .k(k)
17216           .ks(3)
17217           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17218       }
17219     }
17220   }
17221 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_small_kernel)17222   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_small_kernel) {
17223     TEST_REQUIRES_ARM_NEON;
17224     for (uint32_t n = 16; n <= 24; n += 8) {
17225       for (size_t k = 1; k <= 80; k += 17) {
17226         GemmMicrokernelTester()
17227           .mr(1)
17228           .nr(8)
17229           .kr(8)
17230           .sr(1)
17231           .m(1)
17232           .n(n)
17233           .k(k)
17234           .ks(3)
17235           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17236       }
17237     }
17238   }
17239 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cm_subtile)17240   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm_subtile) {
17241     TEST_REQUIRES_ARM_NEON;
17242     for (size_t k = 1; k <= 80; k += 17) {
17243       for (uint32_t n = 1; n <= 8; n++) {
17244         for (uint32_t m = 1; m <= 1; m++) {
17245           GemmMicrokernelTester()
17246             .mr(1)
17247             .nr(8)
17248             .kr(8)
17249             .sr(1)
17250             .m(m)
17251             .n(n)
17252             .k(k)
17253             .cm_stride(11)
17254             .iterations(1)
17255             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17256         }
17257       }
17258     }
17259   }
17260 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,a_offset)17261   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, a_offset) {
17262     TEST_REQUIRES_ARM_NEON;
17263     for (size_t k = 1; k <= 80; k += 17) {
17264       GemmMicrokernelTester()
17265         .mr(1)
17266         .nr(8)
17267         .kr(8)
17268         .sr(1)
17269         .m(1)
17270         .n(8)
17271         .k(k)
17272         .ks(3)
17273         .a_offset(83)
17274         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17275     }
17276   }
17277 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,zero)17278   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, zero) {
17279     TEST_REQUIRES_ARM_NEON;
17280     for (size_t k = 1; k <= 80; k += 17) {
17281       for (uint32_t mz = 0; mz < 1; mz++) {
17282         GemmMicrokernelTester()
17283           .mr(1)
17284           .nr(8)
17285           .kr(8)
17286           .sr(1)
17287           .m(1)
17288           .n(8)
17289           .k(k)
17290           .ks(3)
17291           .a_offset(83)
17292           .zero_index(mz)
17293           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17294       }
17295     }
17296   }
17297 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,qmin)17298   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmin) {
17299     TEST_REQUIRES_ARM_NEON;
17300     GemmMicrokernelTester()
17301       .mr(1)
17302       .nr(8)
17303       .kr(8)
17304       .sr(1)
17305       .m(1)
17306       .n(8)
17307       .k(16)
17308       .qmin(128)
17309       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17310   }
17311 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,qmax)17312   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmax) {
17313     TEST_REQUIRES_ARM_NEON;
17314     GemmMicrokernelTester()
17315       .mr(1)
17316       .nr(8)
17317       .kr(8)
17318       .sr(1)
17319       .m(1)
17320       .n(8)
17321       .k(16)
17322       .qmax(128)
17323       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17324   }
17325 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cm)17326   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm) {
17327     TEST_REQUIRES_ARM_NEON;
17328     GemmMicrokernelTester()
17329       .mr(1)
17330       .nr(8)
17331       .kr(8)
17332       .sr(1)
17333       .m(1)
17334       .n(8)
17335       .k(16)
17336       .cm_stride(11)
17337       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17338   }
17339 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
17340 
17341 
17342 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8)17343   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8) {
17344     TEST_REQUIRES_ARM_NEON;
17345     GemmMicrokernelTester()
17346       .mr(4)
17347       .nr(16)
17348       .kr(1)
17349       .sr(1)
17350       .m(4)
17351       .n(16)
17352       .k(8)
17353       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17354   }
17355 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cn)17356   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) {
17357     TEST_REQUIRES_ARM_NEON;
17358     GemmMicrokernelTester()
17359       .mr(4)
17360       .nr(16)
17361       .kr(1)
17362       .sr(1)
17363       .m(4)
17364       .n(16)
17365       .k(8)
17366       .cn_stride(19)
17367       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17368   }
17369 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile)17370   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) {
17371     TEST_REQUIRES_ARM_NEON;
17372     for (uint32_t n = 1; n <= 16; n++) {
17373       for (uint32_t m = 1; m <= 4; m++) {
17374         GemmMicrokernelTester()
17375           .mr(4)
17376           .nr(16)
17377           .kr(1)
17378           .sr(1)
17379           .m(m)
17380           .n(n)
17381           .k(8)
17382           .iterations(1)
17383           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17384       }
17385     }
17386   }
17387 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_m)17388   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_m) {
17389     TEST_REQUIRES_ARM_NEON;
17390     for (uint32_t m = 1; m <= 4; m++) {
17391       GemmMicrokernelTester()
17392         .mr(4)
17393         .nr(16)
17394         .kr(1)
17395         .sr(1)
17396         .m(m)
17397         .n(16)
17398         .k(8)
17399         .iterations(1)
17400         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17401     }
17402   }
17403 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_n)17404   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_n) {
17405     TEST_REQUIRES_ARM_NEON;
17406     for (uint32_t n = 1; n <= 16; n++) {
17407       GemmMicrokernelTester()
17408         .mr(4)
17409         .nr(16)
17410         .kr(1)
17411         .sr(1)
17412         .m(4)
17413         .n(n)
17414         .k(8)
17415         .iterations(1)
17416         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17417     }
17418   }
17419 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8)17420   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8) {
17421     TEST_REQUIRES_ARM_NEON;
17422     for (size_t k = 1; k < 8; k++) {
17423       GemmMicrokernelTester()
17424         .mr(4)
17425         .nr(16)
17426         .kr(1)
17427         .sr(1)
17428         .m(4)
17429         .n(16)
17430         .k(k)
17431         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17432     }
17433   }
17434 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8_subtile)17435   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_subtile) {
17436     TEST_REQUIRES_ARM_NEON;
17437     for (size_t k = 1; k < 8; k++) {
17438       for (uint32_t n = 1; n <= 16; n++) {
17439         for (uint32_t m = 1; m <= 4; m++) {
17440           GemmMicrokernelTester()
17441             .mr(4)
17442             .nr(16)
17443             .kr(1)
17444             .sr(1)
17445             .m(m)
17446             .n(n)
17447             .k(k)
17448             .iterations(1)
17449             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17450         }
17451       }
17452     }
17453   }
17454 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8)17455   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8) {
17456     TEST_REQUIRES_ARM_NEON;
17457     for (size_t k = 9; k < 16; k++) {
17458       GemmMicrokernelTester()
17459         .mr(4)
17460         .nr(16)
17461         .kr(1)
17462         .sr(1)
17463         .m(4)
17464         .n(16)
17465         .k(k)
17466         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17467     }
17468   }
17469 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8_subtile)17470   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_subtile) {
17471     TEST_REQUIRES_ARM_NEON;
17472     for (size_t k = 9; k < 16; k++) {
17473       for (uint32_t n = 1; n <= 16; n++) {
17474         for (uint32_t m = 1; m <= 4; m++) {
17475           GemmMicrokernelTester()
17476             .mr(4)
17477             .nr(16)
17478             .kr(1)
17479             .sr(1)
17480             .m(m)
17481             .n(n)
17482             .k(k)
17483             .iterations(1)
17484             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17485         }
17486       }
17487     }
17488   }
17489 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_div_8)17490   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8) {
17491     TEST_REQUIRES_ARM_NEON;
17492     for (size_t k = 16; k <= 80; k += 8) {
17493       GemmMicrokernelTester()
17494         .mr(4)
17495         .nr(16)
17496         .kr(1)
17497         .sr(1)
17498         .m(4)
17499         .n(16)
17500         .k(k)
17501         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17502     }
17503   }
17504 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_div_8_subtile)17505   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_subtile) {
17506     TEST_REQUIRES_ARM_NEON;
17507     for (size_t k = 16; k <= 80; k += 8) {
17508       for (uint32_t n = 1; n <= 16; n++) {
17509         for (uint32_t m = 1; m <= 4; m++) {
17510           GemmMicrokernelTester()
17511             .mr(4)
17512             .nr(16)
17513             .kr(1)
17514             .sr(1)
17515             .m(m)
17516             .n(n)
17517             .k(k)
17518             .iterations(1)
17519             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17520         }
17521       }
17522     }
17523   }
17524 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_16)17525   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16) {
17526     TEST_REQUIRES_ARM_NEON;
17527     for (uint32_t n = 17; n < 32; n++) {
17528       for (size_t k = 1; k <= 40; k += 9) {
17529         GemmMicrokernelTester()
17530           .mr(4)
17531           .nr(16)
17532           .kr(1)
17533           .sr(1)
17534           .m(4)
17535           .n(n)
17536           .k(k)
17537           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17538       }
17539     }
17540   }
17541 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_16_strided_cn)17542   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_strided_cn) {
17543     TEST_REQUIRES_ARM_NEON;
17544     for (uint32_t n = 17; n < 32; n++) {
17545       for (size_t k = 1; k <= 40; k += 9) {
17546         GemmMicrokernelTester()
17547           .mr(4)
17548           .nr(16)
17549           .kr(1)
17550           .sr(1)
17551           .m(4)
17552           .n(n)
17553           .k(k)
17554           .cn_stride(19)
17555           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17556       }
17557     }
17558   }
17559 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_16_subtile)17560   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_subtile) {
17561     TEST_REQUIRES_ARM_NEON;
17562     for (uint32_t n = 17; n < 32; n++) {
17563       for (size_t k = 1; k <= 40; k += 9) {
17564         for (uint32_t m = 1; m <= 4; m++) {
17565           GemmMicrokernelTester()
17566             .mr(4)
17567             .nr(16)
17568             .kr(1)
17569             .sr(1)
17570             .m(m)
17571             .n(n)
17572             .k(k)
17573             .iterations(1)
17574             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17575         }
17576       }
17577     }
17578   }
17579 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_16)17580   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16) {
17581     TEST_REQUIRES_ARM_NEON;
17582     for (uint32_t n = 32; n <= 48; n += 16) {
17583       for (size_t k = 1; k <= 40; k += 9) {
17584         GemmMicrokernelTester()
17585           .mr(4)
17586           .nr(16)
17587           .kr(1)
17588           .sr(1)
17589           .m(4)
17590           .n(n)
17591           .k(k)
17592           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17593       }
17594     }
17595   }
17596 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_16_strided_cn)17597   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_strided_cn) {
17598     TEST_REQUIRES_ARM_NEON;
17599     for (uint32_t n = 32; n <= 48; n += 16) {
17600       for (size_t k = 1; k <= 40; k += 9) {
17601         GemmMicrokernelTester()
17602           .mr(4)
17603           .nr(16)
17604           .kr(1)
17605           .sr(1)
17606           .m(4)
17607           .n(n)
17608           .k(k)
17609           .cn_stride(19)
17610           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17611       }
17612     }
17613   }
17614 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_16_subtile)17615   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_subtile) {
17616     TEST_REQUIRES_ARM_NEON;
17617     for (uint32_t n = 32; n <= 48; n += 16) {
17618       for (size_t k = 1; k <= 40; k += 9) {
17619         for (uint32_t m = 1; m <= 4; m++) {
17620           GemmMicrokernelTester()
17621             .mr(4)
17622             .nr(16)
17623             .kr(1)
17624             .sr(1)
17625             .m(m)
17626             .n(n)
17627             .k(k)
17628             .iterations(1)
17629             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17630         }
17631       }
17632     }
17633   }
17634 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,small_kernel)17635   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel) {
17636     TEST_REQUIRES_ARM_NEON;
17637     for (size_t k = 1; k <= 40; k += 9) {
17638       GemmMicrokernelTester()
17639         .mr(4)
17640         .nr(16)
17641         .kr(1)
17642         .sr(1)
17643         .m(4)
17644         .n(16)
17645         .k(k)
17646         .ks(3)
17647         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17648     }
17649   }
17650 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,small_kernel_subtile)17651   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel_subtile) {
17652     TEST_REQUIRES_ARM_NEON;
17653     for (size_t k = 1; k <= 40; k += 9) {
17654       for (uint32_t n = 1; n <= 16; n++) {
17655         for (uint32_t m = 1; m <= 4; m++) {
17656           GemmMicrokernelTester()
17657             .mr(4)
17658             .nr(16)
17659             .kr(1)
17660             .sr(1)
17661             .m(m)
17662             .n(n)
17663             .k(k)
17664             .ks(3)
17665             .iterations(1)
17666             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17667         }
17668       }
17669     }
17670   }
17671 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_16_small_kernel)17672   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_small_kernel) {
17673     TEST_REQUIRES_ARM_NEON;
17674     for (uint32_t n = 17; n < 32; n++) {
17675       for (size_t k = 1; k <= 40; k += 9) {
17676         GemmMicrokernelTester()
17677           .mr(4)
17678           .nr(16)
17679           .kr(1)
17680           .sr(1)
17681           .m(4)
17682           .n(n)
17683           .k(k)
17684           .ks(3)
17685           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17686       }
17687     }
17688   }
17689 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_16_small_kernel)17690   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_small_kernel) {
17691     TEST_REQUIRES_ARM_NEON;
17692     for (uint32_t n = 32; n <= 48; n += 16) {
17693       for (size_t k = 1; k <= 40; k += 9) {
17694         GemmMicrokernelTester()
17695           .mr(4)
17696           .nr(16)
17697           .kr(1)
17698           .sr(1)
17699           .m(4)
17700           .n(n)
17701           .k(k)
17702           .ks(3)
17703           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17704       }
17705     }
17706   }
17707 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cm_subtile)17708   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm_subtile) {
17709     TEST_REQUIRES_ARM_NEON;
17710     for (size_t k = 1; k <= 40; k += 9) {
17711       for (uint32_t n = 1; n <= 16; n++) {
17712         for (uint32_t m = 1; m <= 4; m++) {
17713           GemmMicrokernelTester()
17714             .mr(4)
17715             .nr(16)
17716             .kr(1)
17717             .sr(1)
17718             .m(m)
17719             .n(n)
17720             .k(k)
17721             .cm_stride(19)
17722             .iterations(1)
17723             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17724         }
17725       }
17726     }
17727   }
17728 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,a_offset)17729   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, a_offset) {
17730     TEST_REQUIRES_ARM_NEON;
17731     for (size_t k = 1; k <= 40; k += 9) {
17732       GemmMicrokernelTester()
17733         .mr(4)
17734         .nr(16)
17735         .kr(1)
17736         .sr(1)
17737         .m(4)
17738         .n(16)
17739         .k(k)
17740         .ks(3)
17741         .a_offset(163)
17742         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17743     }
17744   }
17745 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,zero)17746   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, zero) {
17747     TEST_REQUIRES_ARM_NEON;
17748     for (size_t k = 1; k <= 40; k += 9) {
17749       for (uint32_t mz = 0; mz < 4; mz++) {
17750         GemmMicrokernelTester()
17751           .mr(4)
17752           .nr(16)
17753           .kr(1)
17754           .sr(1)
17755           .m(4)
17756           .n(16)
17757           .k(k)
17758           .ks(3)
17759           .a_offset(163)
17760           .zero_index(mz)
17761           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17762       }
17763     }
17764   }
17765 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,qmin)17766   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmin) {
17767     TEST_REQUIRES_ARM_NEON;
17768     GemmMicrokernelTester()
17769       .mr(4)
17770       .nr(16)
17771       .kr(1)
17772       .sr(1)
17773       .m(4)
17774       .n(16)
17775       .k(8)
17776       .qmin(128)
17777       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17778   }
17779 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,qmax)17780   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmax) {
17781     TEST_REQUIRES_ARM_NEON;
17782     GemmMicrokernelTester()
17783       .mr(4)
17784       .nr(16)
17785       .kr(1)
17786       .sr(1)
17787       .m(4)
17788       .n(16)
17789       .k(8)
17790       .qmax(128)
17791       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17792   }
17793 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cm)17794   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) {
17795     TEST_REQUIRES_ARM_NEON;
17796     GemmMicrokernelTester()
17797       .mr(4)
17798       .nr(16)
17799       .kr(1)
17800       .sr(1)
17801       .m(4)
17802       .n(16)
17803       .k(8)
17804       .cm_stride(19)
17805       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17806   }
17807 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
17808 
17809 
17810 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8)17811   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8) {
17812     TEST_REQUIRES_ARM_NEON;
17813     GemmMicrokernelTester()
17814       .mr(4)
17815       .nr(16)
17816       .kr(1)
17817       .sr(1)
17818       .m(4)
17819       .n(16)
17820       .k(8)
17821       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17822   }
17823 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,strided_cn)17824   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cn) {
17825     TEST_REQUIRES_ARM_NEON;
17826     GemmMicrokernelTester()
17827       .mr(4)
17828       .nr(16)
17829       .kr(1)
17830       .sr(1)
17831       .m(4)
17832       .n(16)
17833       .k(8)
17834       .cn_stride(19)
17835       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17836   }
17837 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8_subtile)17838   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile) {
17839     TEST_REQUIRES_ARM_NEON;
17840     for (uint32_t n = 1; n <= 16; n++) {
17841       for (uint32_t m = 1; m <= 4; m++) {
17842         GemmMicrokernelTester()
17843           .mr(4)
17844           .nr(16)
17845           .kr(1)
17846           .sr(1)
17847           .m(m)
17848           .n(n)
17849           .k(8)
17850           .iterations(1)
17851           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17852       }
17853     }
17854   }
17855 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8_subtile_m)17856   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) {
17857     TEST_REQUIRES_ARM_NEON;
17858     for (uint32_t m = 1; m <= 4; m++) {
17859       GemmMicrokernelTester()
17860         .mr(4)
17861         .nr(16)
17862         .kr(1)
17863         .sr(1)
17864         .m(m)
17865         .n(16)
17866         .k(8)
17867         .iterations(1)
17868         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17869     }
17870   }
17871 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8_subtile_n)17872   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) {
17873     TEST_REQUIRES_ARM_NEON;
17874     for (uint32_t n = 1; n <= 16; n++) {
17875       GemmMicrokernelTester()
17876         .mr(4)
17877         .nr(16)
17878         .kr(1)
17879         .sr(1)
17880         .m(4)
17881         .n(n)
17882         .k(8)
17883         .iterations(1)
17884         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17885     }
17886   }
17887 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_lt_8)17888   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8) {
17889     TEST_REQUIRES_ARM_NEON;
17890     for (size_t k = 1; k < 8; k++) {
17891       GemmMicrokernelTester()
17892         .mr(4)
17893         .nr(16)
17894         .kr(1)
17895         .sr(1)
17896         .m(4)
17897         .n(16)
17898         .k(k)
17899         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17900     }
17901   }
17902 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_lt_8_subtile)17903   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8_subtile) {
17904     TEST_REQUIRES_ARM_NEON;
17905     for (size_t k = 1; k < 8; k++) {
17906       for (uint32_t n = 1; n <= 16; n++) {
17907         for (uint32_t m = 1; m <= 4; m++) {
17908           GemmMicrokernelTester()
17909             .mr(4)
17910             .nr(16)
17911             .kr(1)
17912             .sr(1)
17913             .m(m)
17914             .n(n)
17915             .k(k)
17916             .iterations(1)
17917             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17918         }
17919       }
17920     }
17921   }
17922 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_gt_8)17923   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8) {
17924     TEST_REQUIRES_ARM_NEON;
17925     for (size_t k = 9; k < 16; k++) {
17926       GemmMicrokernelTester()
17927         .mr(4)
17928         .nr(16)
17929         .kr(1)
17930         .sr(1)
17931         .m(4)
17932         .n(16)
17933         .k(k)
17934         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17935     }
17936   }
17937 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_gt_8_subtile)17938   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8_subtile) {
17939     TEST_REQUIRES_ARM_NEON;
17940     for (size_t k = 9; k < 16; k++) {
17941       for (uint32_t n = 1; n <= 16; n++) {
17942         for (uint32_t m = 1; m <= 4; m++) {
17943           GemmMicrokernelTester()
17944             .mr(4)
17945             .nr(16)
17946             .kr(1)
17947             .sr(1)
17948             .m(m)
17949             .n(n)
17950             .k(k)
17951             .iterations(1)
17952             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17953         }
17954       }
17955     }
17956   }
17957 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_div_8)17958   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8) {
17959     TEST_REQUIRES_ARM_NEON;
17960     for (size_t k = 16; k <= 80; k += 8) {
17961       GemmMicrokernelTester()
17962         .mr(4)
17963         .nr(16)
17964         .kr(1)
17965         .sr(1)
17966         .m(4)
17967         .n(16)
17968         .k(k)
17969         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17970     }
17971   }
17972 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_div_8_subtile)17973   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8_subtile) {
17974     TEST_REQUIRES_ARM_NEON;
17975     for (size_t k = 16; k <= 80; k += 8) {
17976       for (uint32_t n = 1; n <= 16; n++) {
17977         for (uint32_t m = 1; m <= 4; m++) {
17978           GemmMicrokernelTester()
17979             .mr(4)
17980             .nr(16)
17981             .kr(1)
17982             .sr(1)
17983             .m(m)
17984             .n(n)
17985             .k(k)
17986             .iterations(1)
17987             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17988         }
17989       }
17990     }
17991   }
17992 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_gt_16)17993   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16) {
17994     TEST_REQUIRES_ARM_NEON;
17995     for (uint32_t n = 17; n < 32; n++) {
17996       for (size_t k = 1; k <= 40; k += 9) {
17997         GemmMicrokernelTester()
17998           .mr(4)
17999           .nr(16)
18000           .kr(1)
18001           .sr(1)
18002           .m(4)
18003           .n(n)
18004           .k(k)
18005           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18006       }
18007     }
18008   }
18009 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_gt_16_strided_cn)18010   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_strided_cn) {
18011     TEST_REQUIRES_ARM_NEON;
18012     for (uint32_t n = 17; n < 32; n++) {
18013       for (size_t k = 1; k <= 40; k += 9) {
18014         GemmMicrokernelTester()
18015           .mr(4)
18016           .nr(16)
18017           .kr(1)
18018           .sr(1)
18019           .m(4)
18020           .n(n)
18021           .k(k)
18022           .cn_stride(19)
18023           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18024       }
18025     }
18026   }
18027 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_gt_16_subtile)18028   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_subtile) {
18029     TEST_REQUIRES_ARM_NEON;
18030     for (uint32_t n = 17; n < 32; n++) {
18031       for (size_t k = 1; k <= 40; k += 9) {
18032         for (uint32_t m = 1; m <= 4; m++) {
18033           GemmMicrokernelTester()
18034             .mr(4)
18035             .nr(16)
18036             .kr(1)
18037             .sr(1)
18038             .m(m)
18039             .n(n)
18040             .k(k)
18041             .iterations(1)
18042             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18043         }
18044       }
18045     }
18046   }
18047 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_div_16)18048   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16) {
18049     TEST_REQUIRES_ARM_NEON;
18050     for (uint32_t n = 32; n <= 48; n += 16) {
18051       for (size_t k = 1; k <= 40; k += 9) {
18052         GemmMicrokernelTester()
18053           .mr(4)
18054           .nr(16)
18055           .kr(1)
18056           .sr(1)
18057           .m(4)
18058           .n(n)
18059           .k(k)
18060           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18061       }
18062     }
18063   }
18064 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_div_16_strided_cn)18065   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_strided_cn) {
18066     TEST_REQUIRES_ARM_NEON;
18067     for (uint32_t n = 32; n <= 48; n += 16) {
18068       for (size_t k = 1; k <= 40; k += 9) {
18069         GemmMicrokernelTester()
18070           .mr(4)
18071           .nr(16)
18072           .kr(1)
18073           .sr(1)
18074           .m(4)
18075           .n(n)
18076           .k(k)
18077           .cn_stride(19)
18078           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18079       }
18080     }
18081   }
18082 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_div_16_subtile)18083   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_subtile) {
18084     TEST_REQUIRES_ARM_NEON;
18085     for (uint32_t n = 32; n <= 48; n += 16) {
18086       for (size_t k = 1; k <= 40; k += 9) {
18087         for (uint32_t m = 1; m <= 4; m++) {
18088           GemmMicrokernelTester()
18089             .mr(4)
18090             .nr(16)
18091             .kr(1)
18092             .sr(1)
18093             .m(m)
18094             .n(n)
18095             .k(k)
18096             .iterations(1)
18097             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18098         }
18099       }
18100     }
18101   }
18102 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,small_kernel)18103   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, small_kernel) {
18104     TEST_REQUIRES_ARM_NEON;
18105     for (size_t k = 1; k <= 40; k += 9) {
18106       GemmMicrokernelTester()
18107         .mr(4)
18108         .nr(16)
18109         .kr(1)
18110         .sr(1)
18111         .m(4)
18112         .n(16)
18113         .k(k)
18114         .ks(3)
18115         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18116     }
18117   }
18118 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,small_kernel_subtile)18119   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, small_kernel_subtile) {
18120     TEST_REQUIRES_ARM_NEON;
18121     for (size_t k = 1; k <= 40; k += 9) {
18122       for (uint32_t n = 1; n <= 16; n++) {
18123         for (uint32_t m = 1; m <= 4; m++) {
18124           GemmMicrokernelTester()
18125             .mr(4)
18126             .nr(16)
18127             .kr(1)
18128             .sr(1)
18129             .m(m)
18130             .n(n)
18131             .k(k)
18132             .ks(3)
18133             .iterations(1)
18134             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18135         }
18136       }
18137     }
18138   }
18139 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_gt_16_small_kernel)18140   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_small_kernel) {
18141     TEST_REQUIRES_ARM_NEON;
18142     for (uint32_t n = 17; n < 32; n++) {
18143       for (size_t k = 1; k <= 40; k += 9) {
18144         GemmMicrokernelTester()
18145           .mr(4)
18146           .nr(16)
18147           .kr(1)
18148           .sr(1)
18149           .m(4)
18150           .n(n)
18151           .k(k)
18152           .ks(3)
18153           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18154       }
18155     }
18156   }
18157 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_div_16_small_kernel)18158   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_small_kernel) {
18159     TEST_REQUIRES_ARM_NEON;
18160     for (uint32_t n = 32; n <= 48; n += 16) {
18161       for (size_t k = 1; k <= 40; k += 9) {
18162         GemmMicrokernelTester()
18163           .mr(4)
18164           .nr(16)
18165           .kr(1)
18166           .sr(1)
18167           .m(4)
18168           .n(n)
18169           .k(k)
18170           .ks(3)
18171           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18172       }
18173     }
18174   }
18175 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,strided_cm_subtile)18176   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cm_subtile) {
18177     TEST_REQUIRES_ARM_NEON;
18178     for (size_t k = 1; k <= 40; k += 9) {
18179       for (uint32_t n = 1; n <= 16; n++) {
18180         for (uint32_t m = 1; m <= 4; m++) {
18181           GemmMicrokernelTester()
18182             .mr(4)
18183             .nr(16)
18184             .kr(1)
18185             .sr(1)
18186             .m(m)
18187             .n(n)
18188             .k(k)
18189             .cm_stride(19)
18190             .iterations(1)
18191             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18192         }
18193       }
18194     }
18195   }
18196 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,a_offset)18197   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, a_offset) {
18198     TEST_REQUIRES_ARM_NEON;
18199     for (size_t k = 1; k <= 40; k += 9) {
18200       GemmMicrokernelTester()
18201         .mr(4)
18202         .nr(16)
18203         .kr(1)
18204         .sr(1)
18205         .m(4)
18206         .n(16)
18207         .k(k)
18208         .ks(3)
18209         .a_offset(163)
18210         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18211     }
18212   }
18213 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,zero)18214   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, zero) {
18215     TEST_REQUIRES_ARM_NEON;
18216     for (size_t k = 1; k <= 40; k += 9) {
18217       for (uint32_t mz = 0; mz < 4; mz++) {
18218         GemmMicrokernelTester()
18219           .mr(4)
18220           .nr(16)
18221           .kr(1)
18222           .sr(1)
18223           .m(4)
18224           .n(16)
18225           .k(k)
18226           .ks(3)
18227           .a_offset(163)
18228           .zero_index(mz)
18229           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18230       }
18231     }
18232   }
18233 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,qmin)18234   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, qmin) {
18235     TEST_REQUIRES_ARM_NEON;
18236     GemmMicrokernelTester()
18237       .mr(4)
18238       .nr(16)
18239       .kr(1)
18240       .sr(1)
18241       .m(4)
18242       .n(16)
18243       .k(8)
18244       .qmin(128)
18245       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18246   }
18247 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,qmax)18248   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, qmax) {
18249     TEST_REQUIRES_ARM_NEON;
18250     GemmMicrokernelTester()
18251       .mr(4)
18252       .nr(16)
18253       .kr(1)
18254       .sr(1)
18255       .m(4)
18256       .n(16)
18257       .k(8)
18258       .qmax(128)
18259       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18260   }
18261 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,strided_cm)18262   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cm) {
18263     TEST_REQUIRES_ARM_NEON;
18264     GemmMicrokernelTester()
18265       .mr(4)
18266       .nr(16)
18267       .kr(1)
18268       .sr(1)
18269       .m(4)
18270       .n(16)
18271       .k(8)
18272       .cm_stride(19)
18273       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18274   }
18275 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
18276 
18277 
18278 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16)18279   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16) {
18280     TEST_REQUIRES_ARM_NEON_DOT;
18281     GemmMicrokernelTester()
18282       .mr(4)
18283       .nr(16)
18284       .kr(4)
18285       .sr(1)
18286       .m(4)
18287       .n(16)
18288       .k(16)
18289       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18290   }
18291 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cn)18292   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cn) {
18293     TEST_REQUIRES_ARM_NEON_DOT;
18294     GemmMicrokernelTester()
18295       .mr(4)
18296       .nr(16)
18297       .kr(4)
18298       .sr(1)
18299       .m(4)
18300       .n(16)
18301       .k(16)
18302       .cn_stride(19)
18303       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18304   }
18305 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile)18306   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile) {
18307     TEST_REQUIRES_ARM_NEON_DOT;
18308     for (uint32_t n = 1; n <= 16; n++) {
18309       for (uint32_t m = 1; m <= 4; m++) {
18310         GemmMicrokernelTester()
18311           .mr(4)
18312           .nr(16)
18313           .kr(4)
18314           .sr(1)
18315           .m(m)
18316           .n(n)
18317           .k(16)
18318           .iterations(1)
18319           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18320       }
18321     }
18322   }
18323 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile_m)18324   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_m) {
18325     TEST_REQUIRES_ARM_NEON_DOT;
18326     for (uint32_t m = 1; m <= 4; m++) {
18327       GemmMicrokernelTester()
18328         .mr(4)
18329         .nr(16)
18330         .kr(4)
18331         .sr(1)
18332         .m(m)
18333         .n(16)
18334         .k(16)
18335         .iterations(1)
18336         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18337     }
18338   }
18339 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile_n)18340   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_n) {
18341     TEST_REQUIRES_ARM_NEON_DOT;
18342     for (uint32_t n = 1; n <= 16; n++) {
18343       GemmMicrokernelTester()
18344         .mr(4)
18345         .nr(16)
18346         .kr(4)
18347         .sr(1)
18348         .m(4)
18349         .n(n)
18350         .k(16)
18351         .iterations(1)
18352         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18353     }
18354   }
18355 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_16)18356   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16) {
18357     TEST_REQUIRES_ARM_NEON_DOT;
18358     for (size_t k = 1; k < 16; k++) {
18359       GemmMicrokernelTester()
18360         .mr(4)
18361         .nr(16)
18362         .kr(4)
18363         .sr(1)
18364         .m(4)
18365         .n(16)
18366         .k(k)
18367         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18368     }
18369   }
18370 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_16_subtile)18371   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16_subtile) {
18372     TEST_REQUIRES_ARM_NEON_DOT;
18373     for (size_t k = 1; k < 16; k++) {
18374       for (uint32_t n = 1; n <= 16; n++) {
18375         for (uint32_t m = 1; m <= 4; m++) {
18376           GemmMicrokernelTester()
18377             .mr(4)
18378             .nr(16)
18379             .kr(4)
18380             .sr(1)
18381             .m(m)
18382             .n(n)
18383             .k(k)
18384             .iterations(1)
18385             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18386         }
18387       }
18388     }
18389   }
18390 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_16)18391   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16) {
18392     TEST_REQUIRES_ARM_NEON_DOT;
18393     for (size_t k = 17; k < 32; k++) {
18394       GemmMicrokernelTester()
18395         .mr(4)
18396         .nr(16)
18397         .kr(4)
18398         .sr(1)
18399         .m(4)
18400         .n(16)
18401         .k(k)
18402         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18403     }
18404   }
18405 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_16_subtile)18406   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16_subtile) {
18407     TEST_REQUIRES_ARM_NEON_DOT;
18408     for (size_t k = 17; k < 32; k++) {
18409       for (uint32_t n = 1; n <= 16; n++) {
18410         for (uint32_t m = 1; m <= 4; m++) {
18411           GemmMicrokernelTester()
18412             .mr(4)
18413             .nr(16)
18414             .kr(4)
18415             .sr(1)
18416             .m(m)
18417             .n(n)
18418             .k(k)
18419             .iterations(1)
18420             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18421         }
18422       }
18423     }
18424   }
18425 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_div_16)18426   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16) {
18427     TEST_REQUIRES_ARM_NEON_DOT;
18428     for (size_t k = 32; k <= 160; k += 16) {
18429       GemmMicrokernelTester()
18430         .mr(4)
18431         .nr(16)
18432         .kr(4)
18433         .sr(1)
18434         .m(4)
18435         .n(16)
18436         .k(k)
18437         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18438     }
18439   }
18440 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_div_16_subtile)18441   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16_subtile) {
18442     TEST_REQUIRES_ARM_NEON_DOT;
18443     for (size_t k = 32; k <= 160; k += 16) {
18444       for (uint32_t n = 1; n <= 16; n++) {
18445         for (uint32_t m = 1; m <= 4; m++) {
18446           GemmMicrokernelTester()
18447             .mr(4)
18448             .nr(16)
18449             .kr(4)
18450             .sr(1)
18451             .m(m)
18452             .n(n)
18453             .k(k)
18454             .iterations(1)
18455             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18456         }
18457       }
18458     }
18459   }
18460 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16)18461   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16) {
18462     TEST_REQUIRES_ARM_NEON_DOT;
18463     for (uint32_t n = 17; n < 32; n++) {
18464       for (size_t k = 1; k <= 80; k += 17) {
18465         GemmMicrokernelTester()
18466           .mr(4)
18467           .nr(16)
18468           .kr(4)
18469           .sr(1)
18470           .m(4)
18471           .n(n)
18472           .k(k)
18473           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18474       }
18475     }
18476   }
18477 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_strided_cn)18478   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_strided_cn) {
18479     TEST_REQUIRES_ARM_NEON_DOT;
18480     for (uint32_t n = 17; n < 32; n++) {
18481       for (size_t k = 1; k <= 80; k += 17) {
18482         GemmMicrokernelTester()
18483           .mr(4)
18484           .nr(16)
18485           .kr(4)
18486           .sr(1)
18487           .m(4)
18488           .n(n)
18489           .k(k)
18490           .cn_stride(19)
18491           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18492       }
18493     }
18494   }
18495 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_subtile)18496   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_subtile) {
18497     TEST_REQUIRES_ARM_NEON_DOT;
18498     for (uint32_t n = 17; n < 32; n++) {
18499       for (size_t k = 1; k <= 80; k += 17) {
18500         for (uint32_t m = 1; m <= 4; m++) {
18501           GemmMicrokernelTester()
18502             .mr(4)
18503             .nr(16)
18504             .kr(4)
18505             .sr(1)
18506             .m(m)
18507             .n(n)
18508             .k(k)
18509             .iterations(1)
18510             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18511         }
18512       }
18513     }
18514   }
18515 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16)18516   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16) {
18517     TEST_REQUIRES_ARM_NEON_DOT;
18518     for (uint32_t n = 32; n <= 48; n += 16) {
18519       for (size_t k = 1; k <= 80; k += 17) {
18520         GemmMicrokernelTester()
18521           .mr(4)
18522           .nr(16)
18523           .kr(4)
18524           .sr(1)
18525           .m(4)
18526           .n(n)
18527           .k(k)
18528           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18529       }
18530     }
18531   }
18532 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_strided_cn)18533   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_strided_cn) {
18534     TEST_REQUIRES_ARM_NEON_DOT;
18535     for (uint32_t n = 32; n <= 48; n += 16) {
18536       for (size_t k = 1; k <= 80; k += 17) {
18537         GemmMicrokernelTester()
18538           .mr(4)
18539           .nr(16)
18540           .kr(4)
18541           .sr(1)
18542           .m(4)
18543           .n(n)
18544           .k(k)
18545           .cn_stride(19)
18546           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18547       }
18548     }
18549   }
18550 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_subtile)18551   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_subtile) {
18552     TEST_REQUIRES_ARM_NEON_DOT;
18553     for (uint32_t n = 32; n <= 48; n += 16) {
18554       for (size_t k = 1; k <= 80; k += 17) {
18555         for (uint32_t m = 1; m <= 4; m++) {
18556           GemmMicrokernelTester()
18557             .mr(4)
18558             .nr(16)
18559             .kr(4)
18560             .sr(1)
18561             .m(m)
18562             .n(n)
18563             .k(k)
18564             .iterations(1)
18565             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18566         }
18567       }
18568     }
18569   }
18570 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,small_kernel)18571   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel) {
18572     TEST_REQUIRES_ARM_NEON_DOT;
18573     for (size_t k = 1; k <= 80; k += 17) {
18574       GemmMicrokernelTester()
18575         .mr(4)
18576         .nr(16)
18577         .kr(4)
18578         .sr(1)
18579         .m(4)
18580         .n(16)
18581         .k(k)
18582         .ks(3)
18583         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18584     }
18585   }
18586 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,small_kernel_subtile)18587   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel_subtile) {
18588     TEST_REQUIRES_ARM_NEON_DOT;
18589     for (size_t k = 1; k <= 80; k += 17) {
18590       for (uint32_t n = 1; n <= 16; n++) {
18591         for (uint32_t m = 1; m <= 4; m++) {
18592           GemmMicrokernelTester()
18593             .mr(4)
18594             .nr(16)
18595             .kr(4)
18596             .sr(1)
18597             .m(m)
18598             .n(n)
18599             .k(k)
18600             .ks(3)
18601             .iterations(1)
18602             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18603         }
18604       }
18605     }
18606   }
18607 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_small_kernel)18608   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_small_kernel) {
18609     TEST_REQUIRES_ARM_NEON_DOT;
18610     for (uint32_t n = 17; n < 32; n++) {
18611       for (size_t k = 1; k <= 80; k += 17) {
18612         GemmMicrokernelTester()
18613           .mr(4)
18614           .nr(16)
18615           .kr(4)
18616           .sr(1)
18617           .m(4)
18618           .n(n)
18619           .k(k)
18620           .ks(3)
18621           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18622       }
18623     }
18624   }
18625 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_small_kernel)18626   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_small_kernel) {
18627     TEST_REQUIRES_ARM_NEON_DOT;
18628     for (uint32_t n = 32; n <= 48; n += 16) {
18629       for (size_t k = 1; k <= 80; k += 17) {
18630         GemmMicrokernelTester()
18631           .mr(4)
18632           .nr(16)
18633           .kr(4)
18634           .sr(1)
18635           .m(4)
18636           .n(n)
18637           .k(k)
18638           .ks(3)
18639           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18640       }
18641     }
18642   }
18643 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cm_subtile)18644   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm_subtile) {
18645     TEST_REQUIRES_ARM_NEON_DOT;
18646     for (size_t k = 1; k <= 80; k += 17) {
18647       for (uint32_t n = 1; n <= 16; n++) {
18648         for (uint32_t m = 1; m <= 4; m++) {
18649           GemmMicrokernelTester()
18650             .mr(4)
18651             .nr(16)
18652             .kr(4)
18653             .sr(1)
18654             .m(m)
18655             .n(n)
18656             .k(k)
18657             .cm_stride(19)
18658             .iterations(1)
18659             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18660         }
18661       }
18662     }
18663   }
18664 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,a_offset)18665   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, a_offset) {
18666     TEST_REQUIRES_ARM_NEON_DOT;
18667     for (size_t k = 1; k <= 80; k += 17) {
18668       GemmMicrokernelTester()
18669         .mr(4)
18670         .nr(16)
18671         .kr(4)
18672         .sr(1)
18673         .m(4)
18674         .n(16)
18675         .k(k)
18676         .ks(3)
18677         .a_offset(331)
18678         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18679     }
18680   }
18681 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,zero)18682   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, zero) {
18683     TEST_REQUIRES_ARM_NEON_DOT;
18684     for (size_t k = 1; k <= 80; k += 17) {
18685       for (uint32_t mz = 0; mz < 4; mz++) {
18686         GemmMicrokernelTester()
18687           .mr(4)
18688           .nr(16)
18689           .kr(4)
18690           .sr(1)
18691           .m(4)
18692           .n(16)
18693           .k(k)
18694           .ks(3)
18695           .a_offset(331)
18696           .zero_index(mz)
18697           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18698       }
18699     }
18700   }
18701 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,qmin)18702   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmin) {
18703     TEST_REQUIRES_ARM_NEON_DOT;
18704     GemmMicrokernelTester()
18705       .mr(4)
18706       .nr(16)
18707       .kr(4)
18708       .sr(1)
18709       .m(4)
18710       .n(16)
18711       .k(16)
18712       .qmin(128)
18713       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18714   }
18715 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,qmax)18716   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmax) {
18717     TEST_REQUIRES_ARM_NEON_DOT;
18718     GemmMicrokernelTester()
18719       .mr(4)
18720       .nr(16)
18721       .kr(4)
18722       .sr(1)
18723       .m(4)
18724       .n(16)
18725       .k(16)
18726       .qmax(128)
18727       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18728   }
18729 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cm)18730   TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm) {
18731     TEST_REQUIRES_ARM_NEON_DOT;
18732     GemmMicrokernelTester()
18733       .mr(4)
18734       .nr(16)
18735       .kr(4)
18736       .sr(1)
18737       .m(4)
18738       .n(16)
18739       .k(16)
18740       .cm_stride(19)
18741       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18742   }
18743 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
18744 
18745 
18746 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,k_eq_8)18747   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_eq_8) {
18748     TEST_REQUIRES_ARM_NEON;
18749     GemmMicrokernelTester()
18750       .mr(1)
18751       .nr(8)
18752       .kr(8)
18753       .sr(1)
18754       .m(1)
18755       .n(8)
18756       .k(8)
18757       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18758   }
18759 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,strided_cn)18760   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, strided_cn) {
18761     TEST_REQUIRES_ARM_NEON;
18762     GemmMicrokernelTester()
18763       .mr(1)
18764       .nr(8)
18765       .kr(8)
18766       .sr(1)
18767       .m(1)
18768       .n(8)
18769       .k(8)
18770       .cn_stride(11)
18771       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18772   }
18773 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,k_eq_8_subtile)18774   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_eq_8_subtile) {
18775     TEST_REQUIRES_ARM_NEON;
18776     for (uint32_t n = 1; n <= 8; n++) {
18777       for (uint32_t m = 1; m <= 1; m++) {
18778         GemmMicrokernelTester()
18779           .mr(1)
18780           .nr(8)
18781           .kr(8)
18782           .sr(1)
18783           .m(m)
18784           .n(n)
18785           .k(8)
18786           .iterations(1)
18787           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18788       }
18789     }
18790   }
18791 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,k_eq_8_subtile_m)18792   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_eq_8_subtile_m) {
18793     TEST_REQUIRES_ARM_NEON;
18794     for (uint32_t m = 1; m <= 1; m++) {
18795       GemmMicrokernelTester()
18796         .mr(1)
18797         .nr(8)
18798         .kr(8)
18799         .sr(1)
18800         .m(m)
18801         .n(8)
18802         .k(8)
18803         .iterations(1)
18804         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18805     }
18806   }
18807 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,k_eq_8_subtile_n)18808   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_eq_8_subtile_n) {
18809     TEST_REQUIRES_ARM_NEON;
18810     for (uint32_t n = 1; n <= 8; n++) {
18811       GemmMicrokernelTester()
18812         .mr(1)
18813         .nr(8)
18814         .kr(8)
18815         .sr(1)
18816         .m(1)
18817         .n(n)
18818         .k(8)
18819         .iterations(1)
18820         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18821     }
18822   }
18823 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,k_lt_8)18824   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_lt_8) {
18825     TEST_REQUIRES_ARM_NEON;
18826     for (size_t k = 1; k < 8; k++) {
18827       GemmMicrokernelTester()
18828         .mr(1)
18829         .nr(8)
18830         .kr(8)
18831         .sr(1)
18832         .m(1)
18833         .n(8)
18834         .k(k)
18835         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18836     }
18837   }
18838 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,k_lt_8_subtile)18839   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_lt_8_subtile) {
18840     TEST_REQUIRES_ARM_NEON;
18841     for (size_t k = 1; k < 8; k++) {
18842       for (uint32_t n = 1; n <= 8; n++) {
18843         for (uint32_t m = 1; m <= 1; m++) {
18844           GemmMicrokernelTester()
18845             .mr(1)
18846             .nr(8)
18847             .kr(8)
18848             .sr(1)
18849             .m(m)
18850             .n(n)
18851             .k(k)
18852             .iterations(1)
18853             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18854         }
18855       }
18856     }
18857   }
18858 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,k_gt_8)18859   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_gt_8) {
18860     TEST_REQUIRES_ARM_NEON;
18861     for (size_t k = 9; k < 16; k++) {
18862       GemmMicrokernelTester()
18863         .mr(1)
18864         .nr(8)
18865         .kr(8)
18866         .sr(1)
18867         .m(1)
18868         .n(8)
18869         .k(k)
18870         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18871     }
18872   }
18873 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,k_gt_8_subtile)18874   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_gt_8_subtile) {
18875     TEST_REQUIRES_ARM_NEON;
18876     for (size_t k = 9; k < 16; k++) {
18877       for (uint32_t n = 1; n <= 8; n++) {
18878         for (uint32_t m = 1; m <= 1; m++) {
18879           GemmMicrokernelTester()
18880             .mr(1)
18881             .nr(8)
18882             .kr(8)
18883             .sr(1)
18884             .m(m)
18885             .n(n)
18886             .k(k)
18887             .iterations(1)
18888             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18889         }
18890       }
18891     }
18892   }
18893 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,k_div_8)18894   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_div_8) {
18895     TEST_REQUIRES_ARM_NEON;
18896     for (size_t k = 16; k <= 80; k += 8) {
18897       GemmMicrokernelTester()
18898         .mr(1)
18899         .nr(8)
18900         .kr(8)
18901         .sr(1)
18902         .m(1)
18903         .n(8)
18904         .k(k)
18905         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18906     }
18907   }
18908 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,k_div_8_subtile)18909   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_div_8_subtile) {
18910     TEST_REQUIRES_ARM_NEON;
18911     for (size_t k = 16; k <= 80; k += 8) {
18912       for (uint32_t n = 1; n <= 8; n++) {
18913         for (uint32_t m = 1; m <= 1; m++) {
18914           GemmMicrokernelTester()
18915             .mr(1)
18916             .nr(8)
18917             .kr(8)
18918             .sr(1)
18919             .m(m)
18920             .n(n)
18921             .k(k)
18922             .iterations(1)
18923             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18924         }
18925       }
18926     }
18927   }
18928 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,n_gt_8)18929   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_gt_8) {
18930     TEST_REQUIRES_ARM_NEON;
18931     for (uint32_t n = 9; n < 16; n++) {
18932       for (size_t k = 1; k <= 40; k += 9) {
18933         GemmMicrokernelTester()
18934           .mr(1)
18935           .nr(8)
18936           .kr(8)
18937           .sr(1)
18938           .m(1)
18939           .n(n)
18940           .k(k)
18941           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18942       }
18943     }
18944   }
18945 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,n_gt_8_strided_cn)18946   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_gt_8_strided_cn) {
18947     TEST_REQUIRES_ARM_NEON;
18948     for (uint32_t n = 9; n < 16; n++) {
18949       for (size_t k = 1; k <= 40; k += 9) {
18950         GemmMicrokernelTester()
18951           .mr(1)
18952           .nr(8)
18953           .kr(8)
18954           .sr(1)
18955           .m(1)
18956           .n(n)
18957           .k(k)
18958           .cn_stride(11)
18959           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18960       }
18961     }
18962   }
18963 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,n_gt_8_subtile)18964   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_gt_8_subtile) {
18965     TEST_REQUIRES_ARM_NEON;
18966     for (uint32_t n = 9; n < 16; n++) {
18967       for (size_t k = 1; k <= 40; k += 9) {
18968         for (uint32_t m = 1; m <= 1; m++) {
18969           GemmMicrokernelTester()
18970             .mr(1)
18971             .nr(8)
18972             .kr(8)
18973             .sr(1)
18974             .m(m)
18975             .n(n)
18976             .k(k)
18977             .iterations(1)
18978             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18979         }
18980       }
18981     }
18982   }
18983 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,n_div_8)18984   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_div_8) {
18985     TEST_REQUIRES_ARM_NEON;
18986     for (uint32_t n = 16; n <= 24; n += 8) {
18987       for (size_t k = 1; k <= 40; k += 9) {
18988         GemmMicrokernelTester()
18989           .mr(1)
18990           .nr(8)
18991           .kr(8)
18992           .sr(1)
18993           .m(1)
18994           .n(n)
18995           .k(k)
18996           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18997       }
18998     }
18999   }
19000 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,n_div_8_strided_cn)19001   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_div_8_strided_cn) {
19002     TEST_REQUIRES_ARM_NEON;
19003     for (uint32_t n = 16; n <= 24; n += 8) {
19004       for (size_t k = 1; k <= 40; k += 9) {
19005         GemmMicrokernelTester()
19006           .mr(1)
19007           .nr(8)
19008           .kr(8)
19009           .sr(1)
19010           .m(1)
19011           .n(n)
19012           .k(k)
19013           .cn_stride(11)
19014           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19015       }
19016     }
19017   }
19018 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,n_div_8_subtile)19019   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_div_8_subtile) {
19020     TEST_REQUIRES_ARM_NEON;
19021     for (uint32_t n = 16; n <= 24; n += 8) {
19022       for (size_t k = 1; k <= 40; k += 9) {
19023         for (uint32_t m = 1; m <= 1; m++) {
19024           GemmMicrokernelTester()
19025             .mr(1)
19026             .nr(8)
19027             .kr(8)
19028             .sr(1)
19029             .m(m)
19030             .n(n)
19031             .k(k)
19032             .iterations(1)
19033             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19034         }
19035       }
19036     }
19037   }
19038 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,small_kernel)19039   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, small_kernel) {
19040     TEST_REQUIRES_ARM_NEON;
19041     for (size_t k = 1; k <= 40; k += 9) {
19042       GemmMicrokernelTester()
19043         .mr(1)
19044         .nr(8)
19045         .kr(8)
19046         .sr(1)
19047         .m(1)
19048         .n(8)
19049         .k(k)
19050         .ks(3)
19051         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19052     }
19053   }
19054 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,small_kernel_subtile)19055   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, small_kernel_subtile) {
19056     TEST_REQUIRES_ARM_NEON;
19057     for (size_t k = 1; k <= 40; k += 9) {
19058       for (uint32_t n = 1; n <= 8; n++) {
19059         for (uint32_t m = 1; m <= 1; m++) {
19060           GemmMicrokernelTester()
19061             .mr(1)
19062             .nr(8)
19063             .kr(8)
19064             .sr(1)
19065             .m(m)
19066             .n(n)
19067             .k(k)
19068             .ks(3)
19069             .iterations(1)
19070             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19071         }
19072       }
19073     }
19074   }
19075 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,n_gt_8_small_kernel)19076   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_gt_8_small_kernel) {
19077     TEST_REQUIRES_ARM_NEON;
19078     for (uint32_t n = 9; n < 16; n++) {
19079       for (size_t k = 1; k <= 40; k += 9) {
19080         GemmMicrokernelTester()
19081           .mr(1)
19082           .nr(8)
19083           .kr(8)
19084           .sr(1)
19085           .m(1)
19086           .n(n)
19087           .k(k)
19088           .ks(3)
19089           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19090       }
19091     }
19092   }
19093 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,n_div_8_small_kernel)19094   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_div_8_small_kernel) {
19095     TEST_REQUIRES_ARM_NEON;
19096     for (uint32_t n = 16; n <= 24; n += 8) {
19097       for (size_t k = 1; k <= 40; k += 9) {
19098         GemmMicrokernelTester()
19099           .mr(1)
19100           .nr(8)
19101           .kr(8)
19102           .sr(1)
19103           .m(1)
19104           .n(n)
19105           .k(k)
19106           .ks(3)
19107           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19108       }
19109     }
19110   }
19111 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,strided_cm_subtile)19112   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, strided_cm_subtile) {
19113     TEST_REQUIRES_ARM_NEON;
19114     for (size_t k = 1; k <= 40; k += 9) {
19115       for (uint32_t n = 1; n <= 8; n++) {
19116         for (uint32_t m = 1; m <= 1; m++) {
19117           GemmMicrokernelTester()
19118             .mr(1)
19119             .nr(8)
19120             .kr(8)
19121             .sr(1)
19122             .m(m)
19123             .n(n)
19124             .k(k)
19125             .cm_stride(11)
19126             .iterations(1)
19127             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19128         }
19129       }
19130     }
19131   }
19132 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,a_offset)19133   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, a_offset) {
19134     TEST_REQUIRES_ARM_NEON;
19135     for (size_t k = 1; k <= 40; k += 9) {
19136       GemmMicrokernelTester()
19137         .mr(1)
19138         .nr(8)
19139         .kr(8)
19140         .sr(1)
19141         .m(1)
19142         .n(8)
19143         .k(k)
19144         .ks(3)
19145         .a_offset(43)
19146         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19147     }
19148   }
19149 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,zero)19150   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, zero) {
19151     TEST_REQUIRES_ARM_NEON;
19152     for (size_t k = 1; k <= 40; k += 9) {
19153       for (uint32_t mz = 0; mz < 1; mz++) {
19154         GemmMicrokernelTester()
19155           .mr(1)
19156           .nr(8)
19157           .kr(8)
19158           .sr(1)
19159           .m(1)
19160           .n(8)
19161           .k(k)
19162           .ks(3)
19163           .a_offset(43)
19164           .zero_index(mz)
19165           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19166       }
19167     }
19168   }
19169 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,qmin)19170   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, qmin) {
19171     TEST_REQUIRES_ARM_NEON;
19172     GemmMicrokernelTester()
19173       .mr(1)
19174       .nr(8)
19175       .kr(8)
19176       .sr(1)
19177       .m(1)
19178       .n(8)
19179       .k(8)
19180       .qmin(128)
19181       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19182   }
19183 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,qmax)19184   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, qmax) {
19185     TEST_REQUIRES_ARM_NEON;
19186     GemmMicrokernelTester()
19187       .mr(1)
19188       .nr(8)
19189       .kr(8)
19190       .sr(1)
19191       .m(1)
19192       .n(8)
19193       .k(8)
19194       .qmax(128)
19195       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19196   }
19197 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL,strided_cm)19198   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, strided_cm) {
19199     TEST_REQUIRES_ARM_NEON;
19200     GemmMicrokernelTester()
19201       .mr(1)
19202       .nr(8)
19203       .kr(8)
19204       .sr(1)
19205       .m(1)
19206       .n(8)
19207       .k(8)
19208       .cm_stride(11)
19209       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19210   }
19211 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
19212 
19213 
19214 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,k_eq_8)19215   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_eq_8) {
19216     TEST_REQUIRES_ARM_NEON;
19217     GemmMicrokernelTester()
19218       .mr(3)
19219       .nr(16)
19220       .kr(8)
19221       .sr(1)
19222       .m(3)
19223       .n(16)
19224       .k(8)
19225       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19226   }
19227 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,strided_cn)19228   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, strided_cn) {
19229     TEST_REQUIRES_ARM_NEON;
19230     GemmMicrokernelTester()
19231       .mr(3)
19232       .nr(16)
19233       .kr(8)
19234       .sr(1)
19235       .m(3)
19236       .n(16)
19237       .k(8)
19238       .cn_stride(19)
19239       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19240   }
19241 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,k_eq_8_subtile)19242   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_eq_8_subtile) {
19243     TEST_REQUIRES_ARM_NEON;
19244     for (uint32_t n = 1; n <= 16; n++) {
19245       for (uint32_t m = 1; m <= 3; m++) {
19246         GemmMicrokernelTester()
19247           .mr(3)
19248           .nr(16)
19249           .kr(8)
19250           .sr(1)
19251           .m(m)
19252           .n(n)
19253           .k(8)
19254           .iterations(1)
19255           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19256       }
19257     }
19258   }
19259 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,k_eq_8_subtile_m)19260   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_eq_8_subtile_m) {
19261     TEST_REQUIRES_ARM_NEON;
19262     for (uint32_t m = 1; m <= 3; m++) {
19263       GemmMicrokernelTester()
19264         .mr(3)
19265         .nr(16)
19266         .kr(8)
19267         .sr(1)
19268         .m(m)
19269         .n(16)
19270         .k(8)
19271         .iterations(1)
19272         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19273     }
19274   }
19275 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,k_eq_8_subtile_n)19276   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_eq_8_subtile_n) {
19277     TEST_REQUIRES_ARM_NEON;
19278     for (uint32_t n = 1; n <= 16; n++) {
19279       GemmMicrokernelTester()
19280         .mr(3)
19281         .nr(16)
19282         .kr(8)
19283         .sr(1)
19284         .m(3)
19285         .n(n)
19286         .k(8)
19287         .iterations(1)
19288         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19289     }
19290   }
19291 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,k_lt_8)19292   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_lt_8) {
19293     TEST_REQUIRES_ARM_NEON;
19294     for (size_t k = 1; k < 8; k++) {
19295       GemmMicrokernelTester()
19296         .mr(3)
19297         .nr(16)
19298         .kr(8)
19299         .sr(1)
19300         .m(3)
19301         .n(16)
19302         .k(k)
19303         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19304     }
19305   }
19306 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,k_lt_8_subtile)19307   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_lt_8_subtile) {
19308     TEST_REQUIRES_ARM_NEON;
19309     for (size_t k = 1; k < 8; k++) {
19310       for (uint32_t n = 1; n <= 16; n++) {
19311         for (uint32_t m = 1; m <= 3; m++) {
19312           GemmMicrokernelTester()
19313             .mr(3)
19314             .nr(16)
19315             .kr(8)
19316             .sr(1)
19317             .m(m)
19318             .n(n)
19319             .k(k)
19320             .iterations(1)
19321             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19322         }
19323       }
19324     }
19325   }
19326 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,k_gt_8)19327   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_gt_8) {
19328     TEST_REQUIRES_ARM_NEON;
19329     for (size_t k = 9; k < 16; k++) {
19330       GemmMicrokernelTester()
19331         .mr(3)
19332         .nr(16)
19333         .kr(8)
19334         .sr(1)
19335         .m(3)
19336         .n(16)
19337         .k(k)
19338         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19339     }
19340   }
19341 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,k_gt_8_subtile)19342   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_gt_8_subtile) {
19343     TEST_REQUIRES_ARM_NEON;
19344     for (size_t k = 9; k < 16; k++) {
19345       for (uint32_t n = 1; n <= 16; n++) {
19346         for (uint32_t m = 1; m <= 3; m++) {
19347           GemmMicrokernelTester()
19348             .mr(3)
19349             .nr(16)
19350             .kr(8)
19351             .sr(1)
19352             .m(m)
19353             .n(n)
19354             .k(k)
19355             .iterations(1)
19356             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19357         }
19358       }
19359     }
19360   }
19361 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,k_div_8)19362   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_div_8) {
19363     TEST_REQUIRES_ARM_NEON;
19364     for (size_t k = 16; k <= 80; k += 8) {
19365       GemmMicrokernelTester()
19366         .mr(3)
19367         .nr(16)
19368         .kr(8)
19369         .sr(1)
19370         .m(3)
19371         .n(16)
19372         .k(k)
19373         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19374     }
19375   }
19376 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,k_div_8_subtile)19377   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_div_8_subtile) {
19378     TEST_REQUIRES_ARM_NEON;
19379     for (size_t k = 16; k <= 80; k += 8) {
19380       for (uint32_t n = 1; n <= 16; n++) {
19381         for (uint32_t m = 1; m <= 3; m++) {
19382           GemmMicrokernelTester()
19383             .mr(3)
19384             .nr(16)
19385             .kr(8)
19386             .sr(1)
19387             .m(m)
19388             .n(n)
19389             .k(k)
19390             .iterations(1)
19391             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19392         }
19393       }
19394     }
19395   }
19396 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,n_gt_16)19397   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_gt_16) {
19398     TEST_REQUIRES_ARM_NEON;
19399     for (uint32_t n = 17; n < 32; n++) {
19400       for (size_t k = 1; k <= 40; k += 9) {
19401         GemmMicrokernelTester()
19402           .mr(3)
19403           .nr(16)
19404           .kr(8)
19405           .sr(1)
19406           .m(3)
19407           .n(n)
19408           .k(k)
19409           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19410       }
19411     }
19412   }
19413 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,n_gt_16_strided_cn)19414   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_gt_16_strided_cn) {
19415     TEST_REQUIRES_ARM_NEON;
19416     for (uint32_t n = 17; n < 32; n++) {
19417       for (size_t k = 1; k <= 40; k += 9) {
19418         GemmMicrokernelTester()
19419           .mr(3)
19420           .nr(16)
19421           .kr(8)
19422           .sr(1)
19423           .m(3)
19424           .n(n)
19425           .k(k)
19426           .cn_stride(19)
19427           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19428       }
19429     }
19430   }
19431 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,n_gt_16_subtile)19432   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_gt_16_subtile) {
19433     TEST_REQUIRES_ARM_NEON;
19434     for (uint32_t n = 17; n < 32; n++) {
19435       for (size_t k = 1; k <= 40; k += 9) {
19436         for (uint32_t m = 1; m <= 3; m++) {
19437           GemmMicrokernelTester()
19438             .mr(3)
19439             .nr(16)
19440             .kr(8)
19441             .sr(1)
19442             .m(m)
19443             .n(n)
19444             .k(k)
19445             .iterations(1)
19446             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19447         }
19448       }
19449     }
19450   }
19451 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,n_div_16)19452   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_div_16) {
19453     TEST_REQUIRES_ARM_NEON;
19454     for (uint32_t n = 32; n <= 48; n += 16) {
19455       for (size_t k = 1; k <= 40; k += 9) {
19456         GemmMicrokernelTester()
19457           .mr(3)
19458           .nr(16)
19459           .kr(8)
19460           .sr(1)
19461           .m(3)
19462           .n(n)
19463           .k(k)
19464           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19465       }
19466     }
19467   }
19468 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,n_div_16_strided_cn)19469   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_div_16_strided_cn) {
19470     TEST_REQUIRES_ARM_NEON;
19471     for (uint32_t n = 32; n <= 48; n += 16) {
19472       for (size_t k = 1; k <= 40; k += 9) {
19473         GemmMicrokernelTester()
19474           .mr(3)
19475           .nr(16)
19476           .kr(8)
19477           .sr(1)
19478           .m(3)
19479           .n(n)
19480           .k(k)
19481           .cn_stride(19)
19482           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19483       }
19484     }
19485   }
19486 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,n_div_16_subtile)19487   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_div_16_subtile) {
19488     TEST_REQUIRES_ARM_NEON;
19489     for (uint32_t n = 32; n <= 48; n += 16) {
19490       for (size_t k = 1; k <= 40; k += 9) {
19491         for (uint32_t m = 1; m <= 3; m++) {
19492           GemmMicrokernelTester()
19493             .mr(3)
19494             .nr(16)
19495             .kr(8)
19496             .sr(1)
19497             .m(m)
19498             .n(n)
19499             .k(k)
19500             .iterations(1)
19501             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19502         }
19503       }
19504     }
19505   }
19506 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,small_kernel)19507   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, small_kernel) {
19508     TEST_REQUIRES_ARM_NEON;
19509     for (size_t k = 1; k <= 40; k += 9) {
19510       GemmMicrokernelTester()
19511         .mr(3)
19512         .nr(16)
19513         .kr(8)
19514         .sr(1)
19515         .m(3)
19516         .n(16)
19517         .k(k)
19518         .ks(3)
19519         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19520     }
19521   }
19522 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,small_kernel_subtile)19523   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, small_kernel_subtile) {
19524     TEST_REQUIRES_ARM_NEON;
19525     for (size_t k = 1; k <= 40; k += 9) {
19526       for (uint32_t n = 1; n <= 16; n++) {
19527         for (uint32_t m = 1; m <= 3; m++) {
19528           GemmMicrokernelTester()
19529             .mr(3)
19530             .nr(16)
19531             .kr(8)
19532             .sr(1)
19533             .m(m)
19534             .n(n)
19535             .k(k)
19536             .ks(3)
19537             .iterations(1)
19538             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19539         }
19540       }
19541     }
19542   }
19543 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,n_gt_16_small_kernel)19544   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_gt_16_small_kernel) {
19545     TEST_REQUIRES_ARM_NEON;
19546     for (uint32_t n = 17; n < 32; n++) {
19547       for (size_t k = 1; k <= 40; k += 9) {
19548         GemmMicrokernelTester()
19549           .mr(3)
19550           .nr(16)
19551           .kr(8)
19552           .sr(1)
19553           .m(3)
19554           .n(n)
19555           .k(k)
19556           .ks(3)
19557           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19558       }
19559     }
19560   }
19561 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,n_div_16_small_kernel)19562   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_div_16_small_kernel) {
19563     TEST_REQUIRES_ARM_NEON;
19564     for (uint32_t n = 32; n <= 48; n += 16) {
19565       for (size_t k = 1; k <= 40; k += 9) {
19566         GemmMicrokernelTester()
19567           .mr(3)
19568           .nr(16)
19569           .kr(8)
19570           .sr(1)
19571           .m(3)
19572           .n(n)
19573           .k(k)
19574           .ks(3)
19575           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19576       }
19577     }
19578   }
19579 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,strided_cm_subtile)19580   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, strided_cm_subtile) {
19581     TEST_REQUIRES_ARM_NEON;
19582     for (size_t k = 1; k <= 40; k += 9) {
19583       for (uint32_t n = 1; n <= 16; n++) {
19584         for (uint32_t m = 1; m <= 3; m++) {
19585           GemmMicrokernelTester()
19586             .mr(3)
19587             .nr(16)
19588             .kr(8)
19589             .sr(1)
19590             .m(m)
19591             .n(n)
19592             .k(k)
19593             .cm_stride(19)
19594             .iterations(1)
19595             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19596         }
19597       }
19598     }
19599   }
19600 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,a_offset)19601   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, a_offset) {
19602     TEST_REQUIRES_ARM_NEON;
19603     for (size_t k = 1; k <= 40; k += 9) {
19604       GemmMicrokernelTester()
19605         .mr(3)
19606         .nr(16)
19607         .kr(8)
19608         .sr(1)
19609         .m(3)
19610         .n(16)
19611         .k(k)
19612         .ks(3)
19613         .a_offset(127)
19614         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19615     }
19616   }
19617 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,zero)19618   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, zero) {
19619     TEST_REQUIRES_ARM_NEON;
19620     for (size_t k = 1; k <= 40; k += 9) {
19621       for (uint32_t mz = 0; mz < 3; mz++) {
19622         GemmMicrokernelTester()
19623           .mr(3)
19624           .nr(16)
19625           .kr(8)
19626           .sr(1)
19627           .m(3)
19628           .n(16)
19629           .k(k)
19630           .ks(3)
19631           .a_offset(127)
19632           .zero_index(mz)
19633           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19634       }
19635     }
19636   }
19637 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,qmin)19638   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, qmin) {
19639     TEST_REQUIRES_ARM_NEON;
19640     GemmMicrokernelTester()
19641       .mr(3)
19642       .nr(16)
19643       .kr(8)
19644       .sr(1)
19645       .m(3)
19646       .n(16)
19647       .k(8)
19648       .qmin(128)
19649       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19650   }
19651 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,qmax)19652   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, qmax) {
19653     TEST_REQUIRES_ARM_NEON;
19654     GemmMicrokernelTester()
19655       .mr(3)
19656       .nr(16)
19657       .kr(8)
19658       .sr(1)
19659       .m(3)
19660       .n(16)
19661       .k(8)
19662       .qmax(128)
19663       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19664   }
19665 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL,strided_cm)19666   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, strided_cm) {
19667     TEST_REQUIRES_ARM_NEON;
19668     GemmMicrokernelTester()
19669       .mr(3)
19670       .nr(16)
19671       .kr(8)
19672       .sr(1)
19673       .m(3)
19674       .n(16)
19675       .k(8)
19676       .cm_stride(19)
19677       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19678   }
19679 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
19680 
19681 
19682 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,k_eq_16)19683   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_eq_16) {
19684     TEST_REQUIRES_ARM_NEON;
19685     GemmMicrokernelTester()
19686       .mr(2)
19687       .nr(8)
19688       .kr(8)
19689       .sr(1)
19690       .m(2)
19691       .n(8)
19692       .k(16)
19693       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19694   }
19695 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,strided_cn)19696   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, strided_cn) {
19697     TEST_REQUIRES_ARM_NEON;
19698     GemmMicrokernelTester()
19699       .mr(2)
19700       .nr(8)
19701       .kr(8)
19702       .sr(1)
19703       .m(2)
19704       .n(8)
19705       .k(16)
19706       .cn_stride(11)
19707       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19708   }
19709 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,k_eq_16_subtile)19710   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_eq_16_subtile) {
19711     TEST_REQUIRES_ARM_NEON;
19712     for (uint32_t n = 1; n <= 8; n++) {
19713       for (uint32_t m = 1; m <= 2; m++) {
19714         GemmMicrokernelTester()
19715           .mr(2)
19716           .nr(8)
19717           .kr(8)
19718           .sr(1)
19719           .m(m)
19720           .n(n)
19721           .k(16)
19722           .iterations(1)
19723           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19724       }
19725     }
19726   }
19727 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,k_eq_16_subtile_m)19728   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_eq_16_subtile_m) {
19729     TEST_REQUIRES_ARM_NEON;
19730     for (uint32_t m = 1; m <= 2; m++) {
19731       GemmMicrokernelTester()
19732         .mr(2)
19733         .nr(8)
19734         .kr(8)
19735         .sr(1)
19736         .m(m)
19737         .n(8)
19738         .k(16)
19739         .iterations(1)
19740         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19741     }
19742   }
19743 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,k_eq_16_subtile_n)19744   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_eq_16_subtile_n) {
19745     TEST_REQUIRES_ARM_NEON;
19746     for (uint32_t n = 1; n <= 8; n++) {
19747       GemmMicrokernelTester()
19748         .mr(2)
19749         .nr(8)
19750         .kr(8)
19751         .sr(1)
19752         .m(2)
19753         .n(n)
19754         .k(16)
19755         .iterations(1)
19756         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19757     }
19758   }
19759 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,k_lt_16)19760   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_lt_16) {
19761     TEST_REQUIRES_ARM_NEON;
19762     for (size_t k = 1; k < 16; k++) {
19763       GemmMicrokernelTester()
19764         .mr(2)
19765         .nr(8)
19766         .kr(8)
19767         .sr(1)
19768         .m(2)
19769         .n(8)
19770         .k(k)
19771         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19772     }
19773   }
19774 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,k_lt_16_subtile)19775   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_lt_16_subtile) {
19776     TEST_REQUIRES_ARM_NEON;
19777     for (size_t k = 1; k < 16; k++) {
19778       for (uint32_t n = 1; n <= 8; n++) {
19779         for (uint32_t m = 1; m <= 2; m++) {
19780           GemmMicrokernelTester()
19781             .mr(2)
19782             .nr(8)
19783             .kr(8)
19784             .sr(1)
19785             .m(m)
19786             .n(n)
19787             .k(k)
19788             .iterations(1)
19789             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19790         }
19791       }
19792     }
19793   }
19794 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,k_gt_16)19795   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_gt_16) {
19796     TEST_REQUIRES_ARM_NEON;
19797     for (size_t k = 17; k < 32; k++) {
19798       GemmMicrokernelTester()
19799         .mr(2)
19800         .nr(8)
19801         .kr(8)
19802         .sr(1)
19803         .m(2)
19804         .n(8)
19805         .k(k)
19806         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19807     }
19808   }
19809 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,k_gt_16_subtile)19810   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_gt_16_subtile) {
19811     TEST_REQUIRES_ARM_NEON;
19812     for (size_t k = 17; k < 32; k++) {
19813       for (uint32_t n = 1; n <= 8; n++) {
19814         for (uint32_t m = 1; m <= 2; m++) {
19815           GemmMicrokernelTester()
19816             .mr(2)
19817             .nr(8)
19818             .kr(8)
19819             .sr(1)
19820             .m(m)
19821             .n(n)
19822             .k(k)
19823             .iterations(1)
19824             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19825         }
19826       }
19827     }
19828   }
19829 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,k_div_16)19830   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_div_16) {
19831     TEST_REQUIRES_ARM_NEON;
19832     for (size_t k = 32; k <= 160; k += 16) {
19833       GemmMicrokernelTester()
19834         .mr(2)
19835         .nr(8)
19836         .kr(8)
19837         .sr(1)
19838         .m(2)
19839         .n(8)
19840         .k(k)
19841         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19842     }
19843   }
19844 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,k_div_16_subtile)19845   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_div_16_subtile) {
19846     TEST_REQUIRES_ARM_NEON;
19847     for (size_t k = 32; k <= 160; k += 16) {
19848       for (uint32_t n = 1; n <= 8; n++) {
19849         for (uint32_t m = 1; m <= 2; m++) {
19850           GemmMicrokernelTester()
19851             .mr(2)
19852             .nr(8)
19853             .kr(8)
19854             .sr(1)
19855             .m(m)
19856             .n(n)
19857             .k(k)
19858             .iterations(1)
19859             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19860         }
19861       }
19862     }
19863   }
19864 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,n_gt_8)19865   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_gt_8) {
19866     TEST_REQUIRES_ARM_NEON;
19867     for (uint32_t n = 9; n < 16; n++) {
19868       for (size_t k = 1; k <= 80; k += 17) {
19869         GemmMicrokernelTester()
19870           .mr(2)
19871           .nr(8)
19872           .kr(8)
19873           .sr(1)
19874           .m(2)
19875           .n(n)
19876           .k(k)
19877           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19878       }
19879     }
19880   }
19881 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,n_gt_8_strided_cn)19882   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_gt_8_strided_cn) {
19883     TEST_REQUIRES_ARM_NEON;
19884     for (uint32_t n = 9; n < 16; n++) {
19885       for (size_t k = 1; k <= 80; k += 17) {
19886         GemmMicrokernelTester()
19887           .mr(2)
19888           .nr(8)
19889           .kr(8)
19890           .sr(1)
19891           .m(2)
19892           .n(n)
19893           .k(k)
19894           .cn_stride(11)
19895           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19896       }
19897     }
19898   }
19899 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,n_gt_8_subtile)19900   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_gt_8_subtile) {
19901     TEST_REQUIRES_ARM_NEON;
19902     for (uint32_t n = 9; n < 16; n++) {
19903       for (size_t k = 1; k <= 80; k += 17) {
19904         for (uint32_t m = 1; m <= 2; m++) {
19905           GemmMicrokernelTester()
19906             .mr(2)
19907             .nr(8)
19908             .kr(8)
19909             .sr(1)
19910             .m(m)
19911             .n(n)
19912             .k(k)
19913             .iterations(1)
19914             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19915         }
19916       }
19917     }
19918   }
19919 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,n_div_8)19920   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_div_8) {
19921     TEST_REQUIRES_ARM_NEON;
19922     for (uint32_t n = 16; n <= 24; n += 8) {
19923       for (size_t k = 1; k <= 80; k += 17) {
19924         GemmMicrokernelTester()
19925           .mr(2)
19926           .nr(8)
19927           .kr(8)
19928           .sr(1)
19929           .m(2)
19930           .n(n)
19931           .k(k)
19932           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19933       }
19934     }
19935   }
19936 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,n_div_8_strided_cn)19937   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_div_8_strided_cn) {
19938     TEST_REQUIRES_ARM_NEON;
19939     for (uint32_t n = 16; n <= 24; n += 8) {
19940       for (size_t k = 1; k <= 80; k += 17) {
19941         GemmMicrokernelTester()
19942           .mr(2)
19943           .nr(8)
19944           .kr(8)
19945           .sr(1)
19946           .m(2)
19947           .n(n)
19948           .k(k)
19949           .cn_stride(11)
19950           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19951       }
19952     }
19953   }
19954 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,n_div_8_subtile)19955   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_div_8_subtile) {
19956     TEST_REQUIRES_ARM_NEON;
19957     for (uint32_t n = 16; n <= 24; n += 8) {
19958       for (size_t k = 1; k <= 80; k += 17) {
19959         for (uint32_t m = 1; m <= 2; m++) {
19960           GemmMicrokernelTester()
19961             .mr(2)
19962             .nr(8)
19963             .kr(8)
19964             .sr(1)
19965             .m(m)
19966             .n(n)
19967             .k(k)
19968             .iterations(1)
19969             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19970         }
19971       }
19972     }
19973   }
19974 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,small_kernel)19975   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, small_kernel) {
19976     TEST_REQUIRES_ARM_NEON;
19977     for (size_t k = 1; k <= 80; k += 17) {
19978       GemmMicrokernelTester()
19979         .mr(2)
19980         .nr(8)
19981         .kr(8)
19982         .sr(1)
19983         .m(2)
19984         .n(8)
19985         .k(k)
19986         .ks(3)
19987         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19988     }
19989   }
19990 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,small_kernel_subtile)19991   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, small_kernel_subtile) {
19992     TEST_REQUIRES_ARM_NEON;
19993     for (size_t k = 1; k <= 80; k += 17) {
19994       for (uint32_t n = 1; n <= 8; n++) {
19995         for (uint32_t m = 1; m <= 2; m++) {
19996           GemmMicrokernelTester()
19997             .mr(2)
19998             .nr(8)
19999             .kr(8)
20000             .sr(1)
20001             .m(m)
20002             .n(n)
20003             .k(k)
20004             .ks(3)
20005             .iterations(1)
20006             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20007         }
20008       }
20009     }
20010   }
20011 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,n_gt_8_small_kernel)20012   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_gt_8_small_kernel) {
20013     TEST_REQUIRES_ARM_NEON;
20014     for (uint32_t n = 9; n < 16; n++) {
20015       for (size_t k = 1; k <= 80; k += 17) {
20016         GemmMicrokernelTester()
20017           .mr(2)
20018           .nr(8)
20019           .kr(8)
20020           .sr(1)
20021           .m(2)
20022           .n(n)
20023           .k(k)
20024           .ks(3)
20025           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20026       }
20027     }
20028   }
20029 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,n_div_8_small_kernel)20030   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_div_8_small_kernel) {
20031     TEST_REQUIRES_ARM_NEON;
20032     for (uint32_t n = 16; n <= 24; n += 8) {
20033       for (size_t k = 1; k <= 80; k += 17) {
20034         GemmMicrokernelTester()
20035           .mr(2)
20036           .nr(8)
20037           .kr(8)
20038           .sr(1)
20039           .m(2)
20040           .n(n)
20041           .k(k)
20042           .ks(3)
20043           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20044       }
20045     }
20046   }
20047 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,strided_cm_subtile)20048   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, strided_cm_subtile) {
20049     TEST_REQUIRES_ARM_NEON;
20050     for (size_t k = 1; k <= 80; k += 17) {
20051       for (uint32_t n = 1; n <= 8; n++) {
20052         for (uint32_t m = 1; m <= 2; m++) {
20053           GemmMicrokernelTester()
20054             .mr(2)
20055             .nr(8)
20056             .kr(8)
20057             .sr(1)
20058             .m(m)
20059             .n(n)
20060             .k(k)
20061             .cm_stride(11)
20062             .iterations(1)
20063             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20064         }
20065       }
20066     }
20067   }
20068 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,a_offset)20069   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, a_offset) {
20070     TEST_REQUIRES_ARM_NEON;
20071     for (size_t k = 1; k <= 80; k += 17) {
20072       GemmMicrokernelTester()
20073         .mr(2)
20074         .nr(8)
20075         .kr(8)
20076         .sr(1)
20077         .m(2)
20078         .n(8)
20079         .k(k)
20080         .ks(3)
20081         .a_offset(163)
20082         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20083     }
20084   }
20085 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,zero)20086   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, zero) {
20087     TEST_REQUIRES_ARM_NEON;
20088     for (size_t k = 1; k <= 80; k += 17) {
20089       for (uint32_t mz = 0; mz < 2; mz++) {
20090         GemmMicrokernelTester()
20091           .mr(2)
20092           .nr(8)
20093           .kr(8)
20094           .sr(1)
20095           .m(2)
20096           .n(8)
20097           .k(k)
20098           .ks(3)
20099           .a_offset(163)
20100           .zero_index(mz)
20101           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20102       }
20103     }
20104   }
20105 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,qmin)20106   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, qmin) {
20107     TEST_REQUIRES_ARM_NEON;
20108     GemmMicrokernelTester()
20109       .mr(2)
20110       .nr(8)
20111       .kr(8)
20112       .sr(1)
20113       .m(2)
20114       .n(8)
20115       .k(16)
20116       .qmin(128)
20117       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20118   }
20119 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,qmax)20120   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, qmax) {
20121     TEST_REQUIRES_ARM_NEON;
20122     GemmMicrokernelTester()
20123       .mr(2)
20124       .nr(8)
20125       .kr(8)
20126       .sr(1)
20127       .m(2)
20128       .n(8)
20129       .k(16)
20130       .qmax(128)
20131       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20132   }
20133 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL,strided_cm)20134   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, strided_cm) {
20135     TEST_REQUIRES_ARM_NEON;
20136     GemmMicrokernelTester()
20137       .mr(2)
20138       .nr(8)
20139       .kr(8)
20140       .sr(1)
20141       .m(2)
20142       .n(8)
20143       .k(16)
20144       .cm_stride(11)
20145       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20146   }
20147 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
20148 
20149 
20150 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_eq_16)20151   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16) {
20152     TEST_REQUIRES_ARM_NEON;
20153     GemmMicrokernelTester()
20154       .mr(4)
20155       .nr(8)
20156       .kr(8)
20157       .sr(1)
20158       .m(4)
20159       .n(8)
20160       .k(16)
20161       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20162   }
20163 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,strided_cn)20164   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, strided_cn) {
20165     TEST_REQUIRES_ARM_NEON;
20166     GemmMicrokernelTester()
20167       .mr(4)
20168       .nr(8)
20169       .kr(8)
20170       .sr(1)
20171       .m(4)
20172       .n(8)
20173       .k(16)
20174       .cn_stride(11)
20175       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20176   }
20177 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_eq_16_subtile)20178   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16_subtile) {
20179     TEST_REQUIRES_ARM_NEON;
20180     for (uint32_t n = 1; n <= 8; n++) {
20181       for (uint32_t m = 1; m <= 4; m++) {
20182         GemmMicrokernelTester()
20183           .mr(4)
20184           .nr(8)
20185           .kr(8)
20186           .sr(1)
20187           .m(m)
20188           .n(n)
20189           .k(16)
20190           .iterations(1)
20191           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20192       }
20193     }
20194   }
20195 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_eq_16_subtile_m)20196   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16_subtile_m) {
20197     TEST_REQUIRES_ARM_NEON;
20198     for (uint32_t m = 1; m <= 4; m++) {
20199       GemmMicrokernelTester()
20200         .mr(4)
20201         .nr(8)
20202         .kr(8)
20203         .sr(1)
20204         .m(m)
20205         .n(8)
20206         .k(16)
20207         .iterations(1)
20208         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20209     }
20210   }
20211 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_eq_16_subtile_n)20212   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16_subtile_n) {
20213     TEST_REQUIRES_ARM_NEON;
20214     for (uint32_t n = 1; n <= 8; n++) {
20215       GemmMicrokernelTester()
20216         .mr(4)
20217         .nr(8)
20218         .kr(8)
20219         .sr(1)
20220         .m(4)
20221         .n(n)
20222         .k(16)
20223         .iterations(1)
20224         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20225     }
20226   }
20227 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_lt_16)20228   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_lt_16) {
20229     TEST_REQUIRES_ARM_NEON;
20230     for (size_t k = 1; k < 16; k++) {
20231       GemmMicrokernelTester()
20232         .mr(4)
20233         .nr(8)
20234         .kr(8)
20235         .sr(1)
20236         .m(4)
20237         .n(8)
20238         .k(k)
20239         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20240     }
20241   }
20242 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_lt_16_subtile)20243   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_lt_16_subtile) {
20244     TEST_REQUIRES_ARM_NEON;
20245     for (size_t k = 1; k < 16; k++) {
20246       for (uint32_t n = 1; n <= 8; n++) {
20247         for (uint32_t m = 1; m <= 4; m++) {
20248           GemmMicrokernelTester()
20249             .mr(4)
20250             .nr(8)
20251             .kr(8)
20252             .sr(1)
20253             .m(m)
20254             .n(n)
20255             .k(k)
20256             .iterations(1)
20257             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20258         }
20259       }
20260     }
20261   }
20262 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_gt_16)20263   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_gt_16) {
20264     TEST_REQUIRES_ARM_NEON;
20265     for (size_t k = 17; k < 32; k++) {
20266       GemmMicrokernelTester()
20267         .mr(4)
20268         .nr(8)
20269         .kr(8)
20270         .sr(1)
20271         .m(4)
20272         .n(8)
20273         .k(k)
20274         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20275     }
20276   }
20277 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_gt_16_subtile)20278   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_gt_16_subtile) {
20279     TEST_REQUIRES_ARM_NEON;
20280     for (size_t k = 17; k < 32; k++) {
20281       for (uint32_t n = 1; n <= 8; n++) {
20282         for (uint32_t m = 1; m <= 4; m++) {
20283           GemmMicrokernelTester()
20284             .mr(4)
20285             .nr(8)
20286             .kr(8)
20287             .sr(1)
20288             .m(m)
20289             .n(n)
20290             .k(k)
20291             .iterations(1)
20292             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20293         }
20294       }
20295     }
20296   }
20297 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_div_16)20298   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_div_16) {
20299     TEST_REQUIRES_ARM_NEON;
20300     for (size_t k = 32; k <= 160; k += 16) {
20301       GemmMicrokernelTester()
20302         .mr(4)
20303         .nr(8)
20304         .kr(8)
20305         .sr(1)
20306         .m(4)
20307         .n(8)
20308         .k(k)
20309         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20310     }
20311   }
20312 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_div_16_subtile)20313   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_div_16_subtile) {
20314     TEST_REQUIRES_ARM_NEON;
20315     for (size_t k = 32; k <= 160; k += 16) {
20316       for (uint32_t n = 1; n <= 8; n++) {
20317         for (uint32_t m = 1; m <= 4; m++) {
20318           GemmMicrokernelTester()
20319             .mr(4)
20320             .nr(8)
20321             .kr(8)
20322             .sr(1)
20323             .m(m)
20324             .n(n)
20325             .k(k)
20326             .iterations(1)
20327             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20328         }
20329       }
20330     }
20331   }
20332 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,n_gt_8)20333   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_gt_8) {
20334     TEST_REQUIRES_ARM_NEON;
20335     for (uint32_t n = 9; n < 16; n++) {
20336       for (size_t k = 1; k <= 80; k += 17) {
20337         GemmMicrokernelTester()
20338           .mr(4)
20339           .nr(8)
20340           .kr(8)
20341           .sr(1)
20342           .m(4)
20343           .n(n)
20344           .k(k)
20345           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20346       }
20347     }
20348   }
20349 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,n_gt_8_strided_cn)20350   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_gt_8_strided_cn) {
20351     TEST_REQUIRES_ARM_NEON;
20352     for (uint32_t n = 9; n < 16; n++) {
20353       for (size_t k = 1; k <= 80; k += 17) {
20354         GemmMicrokernelTester()
20355           .mr(4)
20356           .nr(8)
20357           .kr(8)
20358           .sr(1)
20359           .m(4)
20360           .n(n)
20361           .k(k)
20362           .cn_stride(11)
20363           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20364       }
20365     }
20366   }
20367 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,n_gt_8_subtile)20368   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_gt_8_subtile) {
20369     TEST_REQUIRES_ARM_NEON;
20370     for (uint32_t n = 9; n < 16; n++) {
20371       for (size_t k = 1; k <= 80; k += 17) {
20372         for (uint32_t m = 1; m <= 4; m++) {
20373           GemmMicrokernelTester()
20374             .mr(4)
20375             .nr(8)
20376             .kr(8)
20377             .sr(1)
20378             .m(m)
20379             .n(n)
20380             .k(k)
20381             .iterations(1)
20382             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20383         }
20384       }
20385     }
20386   }
20387 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,n_div_8)20388   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_div_8) {
20389     TEST_REQUIRES_ARM_NEON;
20390     for (uint32_t n = 16; n <= 24; n += 8) {
20391       for (size_t k = 1; k <= 80; k += 17) {
20392         GemmMicrokernelTester()
20393           .mr(4)
20394           .nr(8)
20395           .kr(8)
20396           .sr(1)
20397           .m(4)
20398           .n(n)
20399           .k(k)
20400           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20401       }
20402     }
20403   }
20404 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,n_div_8_strided_cn)20405   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_div_8_strided_cn) {
20406     TEST_REQUIRES_ARM_NEON;
20407     for (uint32_t n = 16; n <= 24; n += 8) {
20408       for (size_t k = 1; k <= 80; k += 17) {
20409         GemmMicrokernelTester()
20410           .mr(4)
20411           .nr(8)
20412           .kr(8)
20413           .sr(1)
20414           .m(4)
20415           .n(n)
20416           .k(k)
20417           .cn_stride(11)
20418           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20419       }
20420     }
20421   }
20422 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,n_div_8_subtile)20423   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_div_8_subtile) {
20424     TEST_REQUIRES_ARM_NEON;
20425     for (uint32_t n = 16; n <= 24; n += 8) {
20426       for (size_t k = 1; k <= 80; k += 17) {
20427         for (uint32_t m = 1; m <= 4; m++) {
20428           GemmMicrokernelTester()
20429             .mr(4)
20430             .nr(8)
20431             .kr(8)
20432             .sr(1)
20433             .m(m)
20434             .n(n)
20435             .k(k)
20436             .iterations(1)
20437             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20438         }
20439       }
20440     }
20441   }
20442 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,small_kernel)20443   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, small_kernel) {
20444     TEST_REQUIRES_ARM_NEON;
20445     for (size_t k = 1; k <= 80; k += 17) {
20446       GemmMicrokernelTester()
20447         .mr(4)
20448         .nr(8)
20449         .kr(8)
20450         .sr(1)
20451         .m(4)
20452         .n(8)
20453         .k(k)
20454         .ks(3)
20455         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20456     }
20457   }
20458 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,small_kernel_subtile)20459   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, small_kernel_subtile) {
20460     TEST_REQUIRES_ARM_NEON;
20461     for (size_t k = 1; k <= 80; k += 17) {
20462       for (uint32_t n = 1; n <= 8; n++) {
20463         for (uint32_t m = 1; m <= 4; m++) {
20464           GemmMicrokernelTester()
20465             .mr(4)
20466             .nr(8)
20467             .kr(8)
20468             .sr(1)
20469             .m(m)
20470             .n(n)
20471             .k(k)
20472             .ks(3)
20473             .iterations(1)
20474             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20475         }
20476       }
20477     }
20478   }
20479 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,n_gt_8_small_kernel)20480   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_gt_8_small_kernel) {
20481     TEST_REQUIRES_ARM_NEON;
20482     for (uint32_t n = 9; n < 16; n++) {
20483       for (size_t k = 1; k <= 80; k += 17) {
20484         GemmMicrokernelTester()
20485           .mr(4)
20486           .nr(8)
20487           .kr(8)
20488           .sr(1)
20489           .m(4)
20490           .n(n)
20491           .k(k)
20492           .ks(3)
20493           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20494       }
20495     }
20496   }
20497 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,n_div_8_small_kernel)20498   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_div_8_small_kernel) {
20499     TEST_REQUIRES_ARM_NEON;
20500     for (uint32_t n = 16; n <= 24; n += 8) {
20501       for (size_t k = 1; k <= 80; k += 17) {
20502         GemmMicrokernelTester()
20503           .mr(4)
20504           .nr(8)
20505           .kr(8)
20506           .sr(1)
20507           .m(4)
20508           .n(n)
20509           .k(k)
20510           .ks(3)
20511           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20512       }
20513     }
20514   }
20515 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,strided_cm_subtile)20516   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, strided_cm_subtile) {
20517     TEST_REQUIRES_ARM_NEON;
20518     for (size_t k = 1; k <= 80; k += 17) {
20519       for (uint32_t n = 1; n <= 8; n++) {
20520         for (uint32_t m = 1; m <= 4; m++) {
20521           GemmMicrokernelTester()
20522             .mr(4)
20523             .nr(8)
20524             .kr(8)
20525             .sr(1)
20526             .m(m)
20527             .n(n)
20528             .k(k)
20529             .cm_stride(11)
20530             .iterations(1)
20531             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20532         }
20533       }
20534     }
20535   }
20536 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,a_offset)20537   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, a_offset) {
20538     TEST_REQUIRES_ARM_NEON;
20539     for (size_t k = 1; k <= 80; k += 17) {
20540       GemmMicrokernelTester()
20541         .mr(4)
20542         .nr(8)
20543         .kr(8)
20544         .sr(1)
20545         .m(4)
20546         .n(8)
20547         .k(k)
20548         .ks(3)
20549         .a_offset(331)
20550         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20551     }
20552   }
20553 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,zero)20554   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, zero) {
20555     TEST_REQUIRES_ARM_NEON;
20556     for (size_t k = 1; k <= 80; k += 17) {
20557       for (uint32_t mz = 0; mz < 4; mz++) {
20558         GemmMicrokernelTester()
20559           .mr(4)
20560           .nr(8)
20561           .kr(8)
20562           .sr(1)
20563           .m(4)
20564           .n(8)
20565           .k(k)
20566           .ks(3)
20567           .a_offset(331)
20568           .zero_index(mz)
20569           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20570       }
20571     }
20572   }
20573 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,qmin)20574   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, qmin) {
20575     TEST_REQUIRES_ARM_NEON;
20576     GemmMicrokernelTester()
20577       .mr(4)
20578       .nr(8)
20579       .kr(8)
20580       .sr(1)
20581       .m(4)
20582       .n(8)
20583       .k(16)
20584       .qmin(128)
20585       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20586   }
20587 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,qmax)20588   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, qmax) {
20589     TEST_REQUIRES_ARM_NEON;
20590     GemmMicrokernelTester()
20591       .mr(4)
20592       .nr(8)
20593       .kr(8)
20594       .sr(1)
20595       .m(4)
20596       .n(8)
20597       .k(16)
20598       .qmax(128)
20599       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20600   }
20601 
TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,strided_cm)20602   TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, strided_cm) {
20603     TEST_REQUIRES_ARM_NEON;
20604     GemmMicrokernelTester()
20605       .mr(4)
20606       .nr(8)
20607       .kr(8)
20608       .sr(1)
20609       .m(4)
20610       .n(8)
20611       .k(16)
20612       .cm_stride(11)
20613       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20614   }
20615 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
20616 
20617 
20618 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,k_eq_16)20619   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_eq_16) {
20620     TEST_REQUIRES_ARM_NEON;
20621     GemmMicrokernelTester()
20622       .mr(1)
20623       .nr(16)
20624       .kr(16)
20625       .sr(1)
20626       .m(1)
20627       .n(16)
20628       .k(16)
20629       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20630   }
20631 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,strided_cn)20632   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, strided_cn) {
20633     TEST_REQUIRES_ARM_NEON;
20634     GemmMicrokernelTester()
20635       .mr(1)
20636       .nr(16)
20637       .kr(16)
20638       .sr(1)
20639       .m(1)
20640       .n(16)
20641       .k(16)
20642       .cn_stride(19)
20643       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20644   }
20645 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,k_eq_16_subtile)20646   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_eq_16_subtile) {
20647     TEST_REQUIRES_ARM_NEON;
20648     for (uint32_t n = 1; n <= 16; n++) {
20649       for (uint32_t m = 1; m <= 1; m++) {
20650         GemmMicrokernelTester()
20651           .mr(1)
20652           .nr(16)
20653           .kr(16)
20654           .sr(1)
20655           .m(m)
20656           .n(n)
20657           .k(16)
20658           .iterations(1)
20659           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20660       }
20661     }
20662   }
20663 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,k_eq_16_subtile_m)20664   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_eq_16_subtile_m) {
20665     TEST_REQUIRES_ARM_NEON;
20666     for (uint32_t m = 1; m <= 1; m++) {
20667       GemmMicrokernelTester()
20668         .mr(1)
20669         .nr(16)
20670         .kr(16)
20671         .sr(1)
20672         .m(m)
20673         .n(16)
20674         .k(16)
20675         .iterations(1)
20676         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20677     }
20678   }
20679 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,k_eq_16_subtile_n)20680   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_eq_16_subtile_n) {
20681     TEST_REQUIRES_ARM_NEON;
20682     for (uint32_t n = 1; n <= 16; n++) {
20683       GemmMicrokernelTester()
20684         .mr(1)
20685         .nr(16)
20686         .kr(16)
20687         .sr(1)
20688         .m(1)
20689         .n(n)
20690         .k(16)
20691         .iterations(1)
20692         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20693     }
20694   }
20695 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,k_lt_16)20696   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_lt_16) {
20697     TEST_REQUIRES_ARM_NEON;
20698     for (size_t k = 1; k < 16; k++) {
20699       GemmMicrokernelTester()
20700         .mr(1)
20701         .nr(16)
20702         .kr(16)
20703         .sr(1)
20704         .m(1)
20705         .n(16)
20706         .k(k)
20707         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20708     }
20709   }
20710 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,k_lt_16_subtile)20711   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_lt_16_subtile) {
20712     TEST_REQUIRES_ARM_NEON;
20713     for (size_t k = 1; k < 16; k++) {
20714       for (uint32_t n = 1; n <= 16; n++) {
20715         for (uint32_t m = 1; m <= 1; m++) {
20716           GemmMicrokernelTester()
20717             .mr(1)
20718             .nr(16)
20719             .kr(16)
20720             .sr(1)
20721             .m(m)
20722             .n(n)
20723             .k(k)
20724             .iterations(1)
20725             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20726         }
20727       }
20728     }
20729   }
20730 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,k_gt_16)20731   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_gt_16) {
20732     TEST_REQUIRES_ARM_NEON;
20733     for (size_t k = 17; k < 32; k++) {
20734       GemmMicrokernelTester()
20735         .mr(1)
20736         .nr(16)
20737         .kr(16)
20738         .sr(1)
20739         .m(1)
20740         .n(16)
20741         .k(k)
20742         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20743     }
20744   }
20745 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,k_gt_16_subtile)20746   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_gt_16_subtile) {
20747     TEST_REQUIRES_ARM_NEON;
20748     for (size_t k = 17; k < 32; k++) {
20749       for (uint32_t n = 1; n <= 16; n++) {
20750         for (uint32_t m = 1; m <= 1; m++) {
20751           GemmMicrokernelTester()
20752             .mr(1)
20753             .nr(16)
20754             .kr(16)
20755             .sr(1)
20756             .m(m)
20757             .n(n)
20758             .k(k)
20759             .iterations(1)
20760             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20761         }
20762       }
20763     }
20764   }
20765 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,k_div_16)20766   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_div_16) {
20767     TEST_REQUIRES_ARM_NEON;
20768     for (size_t k = 32; k <= 160; k += 16) {
20769       GemmMicrokernelTester()
20770         .mr(1)
20771         .nr(16)
20772         .kr(16)
20773         .sr(1)
20774         .m(1)
20775         .n(16)
20776         .k(k)
20777         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20778     }
20779   }
20780 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,k_div_16_subtile)20781   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_div_16_subtile) {
20782     TEST_REQUIRES_ARM_NEON;
20783     for (size_t k = 32; k <= 160; k += 16) {
20784       for (uint32_t n = 1; n <= 16; n++) {
20785         for (uint32_t m = 1; m <= 1; m++) {
20786           GemmMicrokernelTester()
20787             .mr(1)
20788             .nr(16)
20789             .kr(16)
20790             .sr(1)
20791             .m(m)
20792             .n(n)
20793             .k(k)
20794             .iterations(1)
20795             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20796         }
20797       }
20798     }
20799   }
20800 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,n_gt_16)20801   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_gt_16) {
20802     TEST_REQUIRES_ARM_NEON;
20803     for (uint32_t n = 17; n < 32; n++) {
20804       for (size_t k = 1; k <= 80; k += 17) {
20805         GemmMicrokernelTester()
20806           .mr(1)
20807           .nr(16)
20808           .kr(16)
20809           .sr(1)
20810           .m(1)
20811           .n(n)
20812           .k(k)
20813           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20814       }
20815     }
20816   }
20817 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,n_gt_16_strided_cn)20818   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_gt_16_strided_cn) {
20819     TEST_REQUIRES_ARM_NEON;
20820     for (uint32_t n = 17; n < 32; n++) {
20821       for (size_t k = 1; k <= 80; k += 17) {
20822         GemmMicrokernelTester()
20823           .mr(1)
20824           .nr(16)
20825           .kr(16)
20826           .sr(1)
20827           .m(1)
20828           .n(n)
20829           .k(k)
20830           .cn_stride(19)
20831           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20832       }
20833     }
20834   }
20835 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,n_gt_16_subtile)20836   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_gt_16_subtile) {
20837     TEST_REQUIRES_ARM_NEON;
20838     for (uint32_t n = 17; n < 32; n++) {
20839       for (size_t k = 1; k <= 80; k += 17) {
20840         for (uint32_t m = 1; m <= 1; m++) {
20841           GemmMicrokernelTester()
20842             .mr(1)
20843             .nr(16)
20844             .kr(16)
20845             .sr(1)
20846             .m(m)
20847             .n(n)
20848             .k(k)
20849             .iterations(1)
20850             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20851         }
20852       }
20853     }
20854   }
20855 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,n_div_16)20856   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_div_16) {
20857     TEST_REQUIRES_ARM_NEON;
20858     for (uint32_t n = 32; n <= 48; n += 16) {
20859       for (size_t k = 1; k <= 80; k += 17) {
20860         GemmMicrokernelTester()
20861           .mr(1)
20862           .nr(16)
20863           .kr(16)
20864           .sr(1)
20865           .m(1)
20866           .n(n)
20867           .k(k)
20868           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20869       }
20870     }
20871   }
20872 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,n_div_16_strided_cn)20873   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_div_16_strided_cn) {
20874     TEST_REQUIRES_ARM_NEON;
20875     for (uint32_t n = 32; n <= 48; n += 16) {
20876       for (size_t k = 1; k <= 80; k += 17) {
20877         GemmMicrokernelTester()
20878           .mr(1)
20879           .nr(16)
20880           .kr(16)
20881           .sr(1)
20882           .m(1)
20883           .n(n)
20884           .k(k)
20885           .cn_stride(19)
20886           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20887       }
20888     }
20889   }
20890 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,n_div_16_subtile)20891   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_div_16_subtile) {
20892     TEST_REQUIRES_ARM_NEON;
20893     for (uint32_t n = 32; n <= 48; n += 16) {
20894       for (size_t k = 1; k <= 80; k += 17) {
20895         for (uint32_t m = 1; m <= 1; m++) {
20896           GemmMicrokernelTester()
20897             .mr(1)
20898             .nr(16)
20899             .kr(16)
20900             .sr(1)
20901             .m(m)
20902             .n(n)
20903             .k(k)
20904             .iterations(1)
20905             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20906         }
20907       }
20908     }
20909   }
20910 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,small_kernel)20911   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, small_kernel) {
20912     TEST_REQUIRES_ARM_NEON;
20913     for (size_t k = 1; k <= 80; k += 17) {
20914       GemmMicrokernelTester()
20915         .mr(1)
20916         .nr(16)
20917         .kr(16)
20918         .sr(1)
20919         .m(1)
20920         .n(16)
20921         .k(k)
20922         .ks(3)
20923         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20924     }
20925   }
20926 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,small_kernel_subtile)20927   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, small_kernel_subtile) {
20928     TEST_REQUIRES_ARM_NEON;
20929     for (size_t k = 1; k <= 80; k += 17) {
20930       for (uint32_t n = 1; n <= 16; n++) {
20931         for (uint32_t m = 1; m <= 1; m++) {
20932           GemmMicrokernelTester()
20933             .mr(1)
20934             .nr(16)
20935             .kr(16)
20936             .sr(1)
20937             .m(m)
20938             .n(n)
20939             .k(k)
20940             .ks(3)
20941             .iterations(1)
20942             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20943         }
20944       }
20945     }
20946   }
20947 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,n_gt_16_small_kernel)20948   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_gt_16_small_kernel) {
20949     TEST_REQUIRES_ARM_NEON;
20950     for (uint32_t n = 17; n < 32; n++) {
20951       for (size_t k = 1; k <= 80; k += 17) {
20952         GemmMicrokernelTester()
20953           .mr(1)
20954           .nr(16)
20955           .kr(16)
20956           .sr(1)
20957           .m(1)
20958           .n(n)
20959           .k(k)
20960           .ks(3)
20961           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20962       }
20963     }
20964   }
20965 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,n_div_16_small_kernel)20966   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_div_16_small_kernel) {
20967     TEST_REQUIRES_ARM_NEON;
20968     for (uint32_t n = 32; n <= 48; n += 16) {
20969       for (size_t k = 1; k <= 80; k += 17) {
20970         GemmMicrokernelTester()
20971           .mr(1)
20972           .nr(16)
20973           .kr(16)
20974           .sr(1)
20975           .m(1)
20976           .n(n)
20977           .k(k)
20978           .ks(3)
20979           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
20980       }
20981     }
20982   }
20983 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,strided_cm_subtile)20984   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, strided_cm_subtile) {
20985     TEST_REQUIRES_ARM_NEON;
20986     for (size_t k = 1; k <= 80; k += 17) {
20987       for (uint32_t n = 1; n <= 16; n++) {
20988         for (uint32_t m = 1; m <= 1; m++) {
20989           GemmMicrokernelTester()
20990             .mr(1)
20991             .nr(16)
20992             .kr(16)
20993             .sr(1)
20994             .m(m)
20995             .n(n)
20996             .k(k)
20997             .cm_stride(19)
20998             .iterations(1)
20999             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21000         }
21001       }
21002     }
21003   }
21004 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,a_offset)21005   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, a_offset) {
21006     TEST_REQUIRES_ARM_NEON;
21007     for (size_t k = 1; k <= 80; k += 17) {
21008       GemmMicrokernelTester()
21009         .mr(1)
21010         .nr(16)
21011         .kr(16)
21012         .sr(1)
21013         .m(1)
21014         .n(16)
21015         .k(k)
21016         .ks(3)
21017         .a_offset(83)
21018         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21019     }
21020   }
21021 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,zero)21022   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, zero) {
21023     TEST_REQUIRES_ARM_NEON;
21024     for (size_t k = 1; k <= 80; k += 17) {
21025       for (uint32_t mz = 0; mz < 1; mz++) {
21026         GemmMicrokernelTester()
21027           .mr(1)
21028           .nr(16)
21029           .kr(16)
21030           .sr(1)
21031           .m(1)
21032           .n(16)
21033           .k(k)
21034           .ks(3)
21035           .a_offset(83)
21036           .zero_index(mz)
21037           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21038       }
21039     }
21040   }
21041 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,qmin)21042   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, qmin) {
21043     TEST_REQUIRES_ARM_NEON;
21044     GemmMicrokernelTester()
21045       .mr(1)
21046       .nr(16)
21047       .kr(16)
21048       .sr(1)
21049       .m(1)
21050       .n(16)
21051       .k(16)
21052       .qmin(128)
21053       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21054   }
21055 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,qmax)21056   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, qmax) {
21057     TEST_REQUIRES_ARM_NEON;
21058     GemmMicrokernelTester()
21059       .mr(1)
21060       .nr(16)
21061       .kr(16)
21062       .sr(1)
21063       .m(1)
21064       .n(16)
21065       .k(16)
21066       .qmax(128)
21067       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21068   }
21069 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL,strided_cm)21070   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, strided_cm) {
21071     TEST_REQUIRES_ARM_NEON;
21072     GemmMicrokernelTester()
21073       .mr(1)
21074       .nr(16)
21075       .kr(16)
21076       .sr(1)
21077       .m(1)
21078       .n(16)
21079       .k(16)
21080       .cm_stride(19)
21081       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21082   }
21083 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
21084 
21085 
21086 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_eq_8)21087   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8) {
21088     TEST_REQUIRES_ARM_NEON_DOT;
21089     GemmMicrokernelTester()
21090       .mr(1)
21091       .nr(8)
21092       .kr(4)
21093       .sr(1)
21094       .m(1)
21095       .n(8)
21096       .k(8)
21097       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21098   }
21099 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,strided_cn)21100   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, strided_cn) {
21101     TEST_REQUIRES_ARM_NEON_DOT;
21102     GemmMicrokernelTester()
21103       .mr(1)
21104       .nr(8)
21105       .kr(4)
21106       .sr(1)
21107       .m(1)
21108       .n(8)
21109       .k(8)
21110       .cn_stride(11)
21111       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21112   }
21113 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_eq_8_subtile)21114   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8_subtile) {
21115     TEST_REQUIRES_ARM_NEON_DOT;
21116     for (uint32_t n = 1; n <= 8; n++) {
21117       for (uint32_t m = 1; m <= 1; m++) {
21118         GemmMicrokernelTester()
21119           .mr(1)
21120           .nr(8)
21121           .kr(4)
21122           .sr(1)
21123           .m(m)
21124           .n(n)
21125           .k(8)
21126           .iterations(1)
21127           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21128       }
21129     }
21130   }
21131 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_eq_8_subtile_m)21132   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8_subtile_m) {
21133     TEST_REQUIRES_ARM_NEON_DOT;
21134     for (uint32_t m = 1; m <= 1; m++) {
21135       GemmMicrokernelTester()
21136         .mr(1)
21137         .nr(8)
21138         .kr(4)
21139         .sr(1)
21140         .m(m)
21141         .n(8)
21142         .k(8)
21143         .iterations(1)
21144         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21145     }
21146   }
21147 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_eq_8_subtile_n)21148   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8_subtile_n) {
21149     TEST_REQUIRES_ARM_NEON_DOT;
21150     for (uint32_t n = 1; n <= 8; n++) {
21151       GemmMicrokernelTester()
21152         .mr(1)
21153         .nr(8)
21154         .kr(4)
21155         .sr(1)
21156         .m(1)
21157         .n(n)
21158         .k(8)
21159         .iterations(1)
21160         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21161     }
21162   }
21163 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_lt_8)21164   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_lt_8) {
21165     TEST_REQUIRES_ARM_NEON_DOT;
21166     for (size_t k = 1; k < 8; k++) {
21167       GemmMicrokernelTester()
21168         .mr(1)
21169         .nr(8)
21170         .kr(4)
21171         .sr(1)
21172         .m(1)
21173         .n(8)
21174         .k(k)
21175         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21176     }
21177   }
21178 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_lt_8_subtile)21179   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_lt_8_subtile) {
21180     TEST_REQUIRES_ARM_NEON_DOT;
21181     for (size_t k = 1; k < 8; k++) {
21182       for (uint32_t n = 1; n <= 8; n++) {
21183         for (uint32_t m = 1; m <= 1; m++) {
21184           GemmMicrokernelTester()
21185             .mr(1)
21186             .nr(8)
21187             .kr(4)
21188             .sr(1)
21189             .m(m)
21190             .n(n)
21191             .k(k)
21192             .iterations(1)
21193             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21194         }
21195       }
21196     }
21197   }
21198 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_gt_8)21199   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_gt_8) {
21200     TEST_REQUIRES_ARM_NEON_DOT;
21201     for (size_t k = 9; k < 16; k++) {
21202       GemmMicrokernelTester()
21203         .mr(1)
21204         .nr(8)
21205         .kr(4)
21206         .sr(1)
21207         .m(1)
21208         .n(8)
21209         .k(k)
21210         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21211     }
21212   }
21213 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_gt_8_subtile)21214   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_gt_8_subtile) {
21215     TEST_REQUIRES_ARM_NEON_DOT;
21216     for (size_t k = 9; k < 16; k++) {
21217       for (uint32_t n = 1; n <= 8; n++) {
21218         for (uint32_t m = 1; m <= 1; m++) {
21219           GemmMicrokernelTester()
21220             .mr(1)
21221             .nr(8)
21222             .kr(4)
21223             .sr(1)
21224             .m(m)
21225             .n(n)
21226             .k(k)
21227             .iterations(1)
21228             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21229         }
21230       }
21231     }
21232   }
21233 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_div_8)21234   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_div_8) {
21235     TEST_REQUIRES_ARM_NEON_DOT;
21236     for (size_t k = 16; k <= 80; k += 8) {
21237       GemmMicrokernelTester()
21238         .mr(1)
21239         .nr(8)
21240         .kr(4)
21241         .sr(1)
21242         .m(1)
21243         .n(8)
21244         .k(k)
21245         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21246     }
21247   }
21248 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_div_8_subtile)21249   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_div_8_subtile) {
21250     TEST_REQUIRES_ARM_NEON_DOT;
21251     for (size_t k = 16; k <= 80; k += 8) {
21252       for (uint32_t n = 1; n <= 8; n++) {
21253         for (uint32_t m = 1; m <= 1; m++) {
21254           GemmMicrokernelTester()
21255             .mr(1)
21256             .nr(8)
21257             .kr(4)
21258             .sr(1)
21259             .m(m)
21260             .n(n)
21261             .k(k)
21262             .iterations(1)
21263             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21264         }
21265       }
21266     }
21267   }
21268 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,n_gt_8)21269   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8) {
21270     TEST_REQUIRES_ARM_NEON_DOT;
21271     for (uint32_t n = 9; n < 16; n++) {
21272       for (size_t k = 1; k <= 40; k += 9) {
21273         GemmMicrokernelTester()
21274           .mr(1)
21275           .nr(8)
21276           .kr(4)
21277           .sr(1)
21278           .m(1)
21279           .n(n)
21280           .k(k)
21281           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21282       }
21283     }
21284   }
21285 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,n_gt_8_strided_cn)21286   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8_strided_cn) {
21287     TEST_REQUIRES_ARM_NEON_DOT;
21288     for (uint32_t n = 9; n < 16; n++) {
21289       for (size_t k = 1; k <= 40; k += 9) {
21290         GemmMicrokernelTester()
21291           .mr(1)
21292           .nr(8)
21293           .kr(4)
21294           .sr(1)
21295           .m(1)
21296           .n(n)
21297           .k(k)
21298           .cn_stride(11)
21299           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21300       }
21301     }
21302   }
21303 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,n_gt_8_subtile)21304   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8_subtile) {
21305     TEST_REQUIRES_ARM_NEON_DOT;
21306     for (uint32_t n = 9; n < 16; n++) {
21307       for (size_t k = 1; k <= 40; k += 9) {
21308         for (uint32_t m = 1; m <= 1; m++) {
21309           GemmMicrokernelTester()
21310             .mr(1)
21311             .nr(8)
21312             .kr(4)
21313             .sr(1)
21314             .m(m)
21315             .n(n)
21316             .k(k)
21317             .iterations(1)
21318             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21319         }
21320       }
21321     }
21322   }
21323 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,n_div_8)21324   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8) {
21325     TEST_REQUIRES_ARM_NEON_DOT;
21326     for (uint32_t n = 16; n <= 24; n += 8) {
21327       for (size_t k = 1; k <= 40; k += 9) {
21328         GemmMicrokernelTester()
21329           .mr(1)
21330           .nr(8)
21331           .kr(4)
21332           .sr(1)
21333           .m(1)
21334           .n(n)
21335           .k(k)
21336           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21337       }
21338     }
21339   }
21340 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,n_div_8_strided_cn)21341   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8_strided_cn) {
21342     TEST_REQUIRES_ARM_NEON_DOT;
21343     for (uint32_t n = 16; n <= 24; n += 8) {
21344       for (size_t k = 1; k <= 40; k += 9) {
21345         GemmMicrokernelTester()
21346           .mr(1)
21347           .nr(8)
21348           .kr(4)
21349           .sr(1)
21350           .m(1)
21351           .n(n)
21352           .k(k)
21353           .cn_stride(11)
21354           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21355       }
21356     }
21357   }
21358 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,n_div_8_subtile)21359   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8_subtile) {
21360     TEST_REQUIRES_ARM_NEON_DOT;
21361     for (uint32_t n = 16; n <= 24; n += 8) {
21362       for (size_t k = 1; k <= 40; k += 9) {
21363         for (uint32_t m = 1; m <= 1; m++) {
21364           GemmMicrokernelTester()
21365             .mr(1)
21366             .nr(8)
21367             .kr(4)
21368             .sr(1)
21369             .m(m)
21370             .n(n)
21371             .k(k)
21372             .iterations(1)
21373             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21374         }
21375       }
21376     }
21377   }
21378 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,small_kernel)21379   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, small_kernel) {
21380     TEST_REQUIRES_ARM_NEON_DOT;
21381     for (size_t k = 1; k <= 40; k += 9) {
21382       GemmMicrokernelTester()
21383         .mr(1)
21384         .nr(8)
21385         .kr(4)
21386         .sr(1)
21387         .m(1)
21388         .n(8)
21389         .k(k)
21390         .ks(3)
21391         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21392     }
21393   }
21394 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,small_kernel_subtile)21395   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, small_kernel_subtile) {
21396     TEST_REQUIRES_ARM_NEON_DOT;
21397     for (size_t k = 1; k <= 40; k += 9) {
21398       for (uint32_t n = 1; n <= 8; n++) {
21399         for (uint32_t m = 1; m <= 1; m++) {
21400           GemmMicrokernelTester()
21401             .mr(1)
21402             .nr(8)
21403             .kr(4)
21404             .sr(1)
21405             .m(m)
21406             .n(n)
21407             .k(k)
21408             .ks(3)
21409             .iterations(1)
21410             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21411         }
21412       }
21413     }
21414   }
21415 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,n_gt_8_small_kernel)21416   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8_small_kernel) {
21417     TEST_REQUIRES_ARM_NEON_DOT;
21418     for (uint32_t n = 9; n < 16; n++) {
21419       for (size_t k = 1; k <= 40; k += 9) {
21420         GemmMicrokernelTester()
21421           .mr(1)
21422           .nr(8)
21423           .kr(4)
21424           .sr(1)
21425           .m(1)
21426           .n(n)
21427           .k(k)
21428           .ks(3)
21429           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21430       }
21431     }
21432   }
21433 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,n_div_8_small_kernel)21434   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8_small_kernel) {
21435     TEST_REQUIRES_ARM_NEON_DOT;
21436     for (uint32_t n = 16; n <= 24; n += 8) {
21437       for (size_t k = 1; k <= 40; k += 9) {
21438         GemmMicrokernelTester()
21439           .mr(1)
21440           .nr(8)
21441           .kr(4)
21442           .sr(1)
21443           .m(1)
21444           .n(n)
21445           .k(k)
21446           .ks(3)
21447           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21448       }
21449     }
21450   }
21451 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,strided_cm_subtile)21452   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, strided_cm_subtile) {
21453     TEST_REQUIRES_ARM_NEON_DOT;
21454     for (size_t k = 1; k <= 40; k += 9) {
21455       for (uint32_t n = 1; n <= 8; n++) {
21456         for (uint32_t m = 1; m <= 1; m++) {
21457           GemmMicrokernelTester()
21458             .mr(1)
21459             .nr(8)
21460             .kr(4)
21461             .sr(1)
21462             .m(m)
21463             .n(n)
21464             .k(k)
21465             .cm_stride(11)
21466             .iterations(1)
21467             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21468         }
21469       }
21470     }
21471   }
21472 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,a_offset)21473   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, a_offset) {
21474     TEST_REQUIRES_ARM_NEON_DOT;
21475     for (size_t k = 1; k <= 40; k += 9) {
21476       GemmMicrokernelTester()
21477         .mr(1)
21478         .nr(8)
21479         .kr(4)
21480         .sr(1)
21481         .m(1)
21482         .n(8)
21483         .k(k)
21484         .ks(3)
21485         .a_offset(43)
21486         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21487     }
21488   }
21489 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,zero)21490   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, zero) {
21491     TEST_REQUIRES_ARM_NEON_DOT;
21492     for (size_t k = 1; k <= 40; k += 9) {
21493       for (uint32_t mz = 0; mz < 1; mz++) {
21494         GemmMicrokernelTester()
21495           .mr(1)
21496           .nr(8)
21497           .kr(4)
21498           .sr(1)
21499           .m(1)
21500           .n(8)
21501           .k(k)
21502           .ks(3)
21503           .a_offset(43)
21504           .zero_index(mz)
21505           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21506       }
21507     }
21508   }
21509 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,qmin)21510   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, qmin) {
21511     TEST_REQUIRES_ARM_NEON_DOT;
21512     GemmMicrokernelTester()
21513       .mr(1)
21514       .nr(8)
21515       .kr(4)
21516       .sr(1)
21517       .m(1)
21518       .n(8)
21519       .k(8)
21520       .qmin(128)
21521       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21522   }
21523 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,qmax)21524   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, qmax) {
21525     TEST_REQUIRES_ARM_NEON_DOT;
21526     GemmMicrokernelTester()
21527       .mr(1)
21528       .nr(8)
21529       .kr(4)
21530       .sr(1)
21531       .m(1)
21532       .n(8)
21533       .k(8)
21534       .qmax(128)
21535       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21536   }
21537 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,strided_cm)21538   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, strided_cm) {
21539     TEST_REQUIRES_ARM_NEON_DOT;
21540     GemmMicrokernelTester()
21541       .mr(1)
21542       .nr(8)
21543       .kr(4)
21544       .sr(1)
21545       .m(1)
21546       .n(8)
21547       .k(8)
21548       .cm_stride(11)
21549       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21550   }
21551 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
21552 
21553 
21554 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_eq_8)21555   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8) {
21556     TEST_REQUIRES_ARM_NEON_DOT;
21557     GemmMicrokernelTester()
21558       .mr(6)
21559       .nr(8)
21560       .kr(4)
21561       .sr(1)
21562       .m(6)
21563       .n(8)
21564       .k(8)
21565       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21566   }
21567 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,strided_cn)21568   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, strided_cn) {
21569     TEST_REQUIRES_ARM_NEON_DOT;
21570     GemmMicrokernelTester()
21571       .mr(6)
21572       .nr(8)
21573       .kr(4)
21574       .sr(1)
21575       .m(6)
21576       .n(8)
21577       .k(8)
21578       .cn_stride(11)
21579       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21580   }
21581 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_eq_8_subtile)21582   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8_subtile) {
21583     TEST_REQUIRES_ARM_NEON_DOT;
21584     for (uint32_t n = 1; n <= 8; n++) {
21585       for (uint32_t m = 1; m <= 6; m++) {
21586         GemmMicrokernelTester()
21587           .mr(6)
21588           .nr(8)
21589           .kr(4)
21590           .sr(1)
21591           .m(m)
21592           .n(n)
21593           .k(8)
21594           .iterations(1)
21595           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21596       }
21597     }
21598   }
21599 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_eq_8_subtile_m)21600   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8_subtile_m) {
21601     TEST_REQUIRES_ARM_NEON_DOT;
21602     for (uint32_t m = 1; m <= 6; m++) {
21603       GemmMicrokernelTester()
21604         .mr(6)
21605         .nr(8)
21606         .kr(4)
21607         .sr(1)
21608         .m(m)
21609         .n(8)
21610         .k(8)
21611         .iterations(1)
21612         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21613     }
21614   }
21615 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_eq_8_subtile_n)21616   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8_subtile_n) {
21617     TEST_REQUIRES_ARM_NEON_DOT;
21618     for (uint32_t n = 1; n <= 8; n++) {
21619       GemmMicrokernelTester()
21620         .mr(6)
21621         .nr(8)
21622         .kr(4)
21623         .sr(1)
21624         .m(6)
21625         .n(n)
21626         .k(8)
21627         .iterations(1)
21628         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21629     }
21630   }
21631 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_lt_8)21632   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_lt_8) {
21633     TEST_REQUIRES_ARM_NEON_DOT;
21634     for (size_t k = 1; k < 8; k++) {
21635       GemmMicrokernelTester()
21636         .mr(6)
21637         .nr(8)
21638         .kr(4)
21639         .sr(1)
21640         .m(6)
21641         .n(8)
21642         .k(k)
21643         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21644     }
21645   }
21646 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_lt_8_subtile)21647   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_lt_8_subtile) {
21648     TEST_REQUIRES_ARM_NEON_DOT;
21649     for (size_t k = 1; k < 8; k++) {
21650       for (uint32_t n = 1; n <= 8; n++) {
21651         for (uint32_t m = 1; m <= 6; m++) {
21652           GemmMicrokernelTester()
21653             .mr(6)
21654             .nr(8)
21655             .kr(4)
21656             .sr(1)
21657             .m(m)
21658             .n(n)
21659             .k(k)
21660             .iterations(1)
21661             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21662         }
21663       }
21664     }
21665   }
21666 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_gt_8)21667   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_gt_8) {
21668     TEST_REQUIRES_ARM_NEON_DOT;
21669     for (size_t k = 9; k < 16; k++) {
21670       GemmMicrokernelTester()
21671         .mr(6)
21672         .nr(8)
21673         .kr(4)
21674         .sr(1)
21675         .m(6)
21676         .n(8)
21677         .k(k)
21678         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21679     }
21680   }
21681 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_gt_8_subtile)21682   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_gt_8_subtile) {
21683     TEST_REQUIRES_ARM_NEON_DOT;
21684     for (size_t k = 9; k < 16; k++) {
21685       for (uint32_t n = 1; n <= 8; n++) {
21686         for (uint32_t m = 1; m <= 6; m++) {
21687           GemmMicrokernelTester()
21688             .mr(6)
21689             .nr(8)
21690             .kr(4)
21691             .sr(1)
21692             .m(m)
21693             .n(n)
21694             .k(k)
21695             .iterations(1)
21696             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21697         }
21698       }
21699     }
21700   }
21701 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_div_8)21702   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_div_8) {
21703     TEST_REQUIRES_ARM_NEON_DOT;
21704     for (size_t k = 16; k <= 80; k += 8) {
21705       GemmMicrokernelTester()
21706         .mr(6)
21707         .nr(8)
21708         .kr(4)
21709         .sr(1)
21710         .m(6)
21711         .n(8)
21712         .k(k)
21713         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21714     }
21715   }
21716 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_div_8_subtile)21717   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_div_8_subtile) {
21718     TEST_REQUIRES_ARM_NEON_DOT;
21719     for (size_t k = 16; k <= 80; k += 8) {
21720       for (uint32_t n = 1; n <= 8; n++) {
21721         for (uint32_t m = 1; m <= 6; m++) {
21722           GemmMicrokernelTester()
21723             .mr(6)
21724             .nr(8)
21725             .kr(4)
21726             .sr(1)
21727             .m(m)
21728             .n(n)
21729             .k(k)
21730             .iterations(1)
21731             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21732         }
21733       }
21734     }
21735   }
21736 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,n_gt_8)21737   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8) {
21738     TEST_REQUIRES_ARM_NEON_DOT;
21739     for (uint32_t n = 9; n < 16; n++) {
21740       for (size_t k = 1; k <= 40; k += 9) {
21741         GemmMicrokernelTester()
21742           .mr(6)
21743           .nr(8)
21744           .kr(4)
21745           .sr(1)
21746           .m(6)
21747           .n(n)
21748           .k(k)
21749           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21750       }
21751     }
21752   }
21753 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,n_gt_8_strided_cn)21754   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8_strided_cn) {
21755     TEST_REQUIRES_ARM_NEON_DOT;
21756     for (uint32_t n = 9; n < 16; n++) {
21757       for (size_t k = 1; k <= 40; k += 9) {
21758         GemmMicrokernelTester()
21759           .mr(6)
21760           .nr(8)
21761           .kr(4)
21762           .sr(1)
21763           .m(6)
21764           .n(n)
21765           .k(k)
21766           .cn_stride(11)
21767           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21768       }
21769     }
21770   }
21771 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,n_gt_8_subtile)21772   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8_subtile) {
21773     TEST_REQUIRES_ARM_NEON_DOT;
21774     for (uint32_t n = 9; n < 16; n++) {
21775       for (size_t k = 1; k <= 40; k += 9) {
21776         for (uint32_t m = 1; m <= 6; m++) {
21777           GemmMicrokernelTester()
21778             .mr(6)
21779             .nr(8)
21780             .kr(4)
21781             .sr(1)
21782             .m(m)
21783             .n(n)
21784             .k(k)
21785             .iterations(1)
21786             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21787         }
21788       }
21789     }
21790   }
21791 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,n_div_8)21792   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8) {
21793     TEST_REQUIRES_ARM_NEON_DOT;
21794     for (uint32_t n = 16; n <= 24; n += 8) {
21795       for (size_t k = 1; k <= 40; k += 9) {
21796         GemmMicrokernelTester()
21797           .mr(6)
21798           .nr(8)
21799           .kr(4)
21800           .sr(1)
21801           .m(6)
21802           .n(n)
21803           .k(k)
21804           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21805       }
21806     }
21807   }
21808 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,n_div_8_strided_cn)21809   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8_strided_cn) {
21810     TEST_REQUIRES_ARM_NEON_DOT;
21811     for (uint32_t n = 16; n <= 24; n += 8) {
21812       for (size_t k = 1; k <= 40; k += 9) {
21813         GemmMicrokernelTester()
21814           .mr(6)
21815           .nr(8)
21816           .kr(4)
21817           .sr(1)
21818           .m(6)
21819           .n(n)
21820           .k(k)
21821           .cn_stride(11)
21822           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21823       }
21824     }
21825   }
21826 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,n_div_8_subtile)21827   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8_subtile) {
21828     TEST_REQUIRES_ARM_NEON_DOT;
21829     for (uint32_t n = 16; n <= 24; n += 8) {
21830       for (size_t k = 1; k <= 40; k += 9) {
21831         for (uint32_t m = 1; m <= 6; m++) {
21832           GemmMicrokernelTester()
21833             .mr(6)
21834             .nr(8)
21835             .kr(4)
21836             .sr(1)
21837             .m(m)
21838             .n(n)
21839             .k(k)
21840             .iterations(1)
21841             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21842         }
21843       }
21844     }
21845   }
21846 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,small_kernel)21847   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, small_kernel) {
21848     TEST_REQUIRES_ARM_NEON_DOT;
21849     for (size_t k = 1; k <= 40; k += 9) {
21850       GemmMicrokernelTester()
21851         .mr(6)
21852         .nr(8)
21853         .kr(4)
21854         .sr(1)
21855         .m(6)
21856         .n(8)
21857         .k(k)
21858         .ks(3)
21859         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21860     }
21861   }
21862 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,small_kernel_subtile)21863   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, small_kernel_subtile) {
21864     TEST_REQUIRES_ARM_NEON_DOT;
21865     for (size_t k = 1; k <= 40; k += 9) {
21866       for (uint32_t n = 1; n <= 8; n++) {
21867         for (uint32_t m = 1; m <= 6; m++) {
21868           GemmMicrokernelTester()
21869             .mr(6)
21870             .nr(8)
21871             .kr(4)
21872             .sr(1)
21873             .m(m)
21874             .n(n)
21875             .k(k)
21876             .ks(3)
21877             .iterations(1)
21878             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21879         }
21880       }
21881     }
21882   }
21883 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,n_gt_8_small_kernel)21884   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8_small_kernel) {
21885     TEST_REQUIRES_ARM_NEON_DOT;
21886     for (uint32_t n = 9; n < 16; n++) {
21887       for (size_t k = 1; k <= 40; k += 9) {
21888         GemmMicrokernelTester()
21889           .mr(6)
21890           .nr(8)
21891           .kr(4)
21892           .sr(1)
21893           .m(6)
21894           .n(n)
21895           .k(k)
21896           .ks(3)
21897           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21898       }
21899     }
21900   }
21901 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,n_div_8_small_kernel)21902   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8_small_kernel) {
21903     TEST_REQUIRES_ARM_NEON_DOT;
21904     for (uint32_t n = 16; n <= 24; n += 8) {
21905       for (size_t k = 1; k <= 40; k += 9) {
21906         GemmMicrokernelTester()
21907           .mr(6)
21908           .nr(8)
21909           .kr(4)
21910           .sr(1)
21911           .m(6)
21912           .n(n)
21913           .k(k)
21914           .ks(3)
21915           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21916       }
21917     }
21918   }
21919 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,strided_cm_subtile)21920   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, strided_cm_subtile) {
21921     TEST_REQUIRES_ARM_NEON_DOT;
21922     for (size_t k = 1; k <= 40; k += 9) {
21923       for (uint32_t n = 1; n <= 8; n++) {
21924         for (uint32_t m = 1; m <= 6; m++) {
21925           GemmMicrokernelTester()
21926             .mr(6)
21927             .nr(8)
21928             .kr(4)
21929             .sr(1)
21930             .m(m)
21931             .n(n)
21932             .k(k)
21933             .cm_stride(11)
21934             .iterations(1)
21935             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21936         }
21937       }
21938     }
21939   }
21940 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,a_offset)21941   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, a_offset) {
21942     TEST_REQUIRES_ARM_NEON_DOT;
21943     for (size_t k = 1; k <= 40; k += 9) {
21944       GemmMicrokernelTester()
21945         .mr(6)
21946         .nr(8)
21947         .kr(4)
21948         .sr(1)
21949         .m(6)
21950         .n(8)
21951         .k(k)
21952         .ks(3)
21953         .a_offset(251)
21954         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21955     }
21956   }
21957 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,zero)21958   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, zero) {
21959     TEST_REQUIRES_ARM_NEON_DOT;
21960     for (size_t k = 1; k <= 40; k += 9) {
21961       for (uint32_t mz = 0; mz < 6; mz++) {
21962         GemmMicrokernelTester()
21963           .mr(6)
21964           .nr(8)
21965           .kr(4)
21966           .sr(1)
21967           .m(6)
21968           .n(8)
21969           .k(k)
21970           .ks(3)
21971           .a_offset(251)
21972           .zero_index(mz)
21973           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21974       }
21975     }
21976   }
21977 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,qmin)21978   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, qmin) {
21979     TEST_REQUIRES_ARM_NEON_DOT;
21980     GemmMicrokernelTester()
21981       .mr(6)
21982       .nr(8)
21983       .kr(4)
21984       .sr(1)
21985       .m(6)
21986       .n(8)
21987       .k(8)
21988       .qmin(128)
21989       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21990   }
21991 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,qmax)21992   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, qmax) {
21993     TEST_REQUIRES_ARM_NEON_DOT;
21994     GemmMicrokernelTester()
21995       .mr(6)
21996       .nr(8)
21997       .kr(4)
21998       .sr(1)
21999       .m(6)
22000       .n(8)
22001       .k(8)
22002       .qmax(128)
22003       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22004   }
22005 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,strided_cm)22006   TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, strided_cm) {
22007     TEST_REQUIRES_ARM_NEON_DOT;
22008     GemmMicrokernelTester()
22009       .mr(6)
22010       .nr(8)
22011       .kr(4)
22012       .sr(1)
22013       .m(6)
22014       .n(8)
22015       .k(8)
22016       .cm_stride(11)
22017       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22018   }
22019 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
22020 
22021 
22022 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_eq_8)22023   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_eq_8) {
22024     TEST_REQUIRES_ARM_NEON_DOT;
22025     GemmMicrokernelTester()
22026       .mr(8)
22027       .nr(8)
22028       .kr(4)
22029       .sr(1)
22030       .m(8)
22031       .n(8)
22032       .k(8)
22033       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22034   }
22035 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,strided_cn)22036   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, strided_cn) {
22037     TEST_REQUIRES_ARM_NEON_DOT;
22038     GemmMicrokernelTester()
22039       .mr(8)
22040       .nr(8)
22041       .kr(4)
22042       .sr(1)
22043       .m(8)
22044       .n(8)
22045       .k(8)
22046       .cn_stride(11)
22047       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22048   }
22049 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_eq_8_subtile)22050   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_eq_8_subtile) {
22051     TEST_REQUIRES_ARM_NEON_DOT;
22052     for (uint32_t n = 1; n <= 8; n++) {
22053       for (uint32_t m = 1; m <= 8; m++) {
22054         GemmMicrokernelTester()
22055           .mr(8)
22056           .nr(8)
22057           .kr(4)
22058           .sr(1)
22059           .m(m)
22060           .n(n)
22061           .k(8)
22062           .iterations(1)
22063           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22064       }
22065     }
22066   }
22067 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_eq_8_subtile_m)22068   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_eq_8_subtile_m) {
22069     TEST_REQUIRES_ARM_NEON_DOT;
22070     for (uint32_t m = 1; m <= 8; m++) {
22071       GemmMicrokernelTester()
22072         .mr(8)
22073         .nr(8)
22074         .kr(4)
22075         .sr(1)
22076         .m(m)
22077         .n(8)
22078         .k(8)
22079         .iterations(1)
22080         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22081     }
22082   }
22083 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_eq_8_subtile_n)22084   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_eq_8_subtile_n) {
22085     TEST_REQUIRES_ARM_NEON_DOT;
22086     for (uint32_t n = 1; n <= 8; n++) {
22087       GemmMicrokernelTester()
22088         .mr(8)
22089         .nr(8)
22090         .kr(4)
22091         .sr(1)
22092         .m(8)
22093         .n(n)
22094         .k(8)
22095         .iterations(1)
22096         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22097     }
22098   }
22099 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_lt_8)22100   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_lt_8) {
22101     TEST_REQUIRES_ARM_NEON_DOT;
22102     for (size_t k = 1; k < 8; k++) {
22103       GemmMicrokernelTester()
22104         .mr(8)
22105         .nr(8)
22106         .kr(4)
22107         .sr(1)
22108         .m(8)
22109         .n(8)
22110         .k(k)
22111         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22112     }
22113   }
22114 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_lt_8_subtile)22115   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_lt_8_subtile) {
22116     TEST_REQUIRES_ARM_NEON_DOT;
22117     for (size_t k = 1; k < 8; k++) {
22118       for (uint32_t n = 1; n <= 8; n++) {
22119         for (uint32_t m = 1; m <= 8; m++) {
22120           GemmMicrokernelTester()
22121             .mr(8)
22122             .nr(8)
22123             .kr(4)
22124             .sr(1)
22125             .m(m)
22126             .n(n)
22127             .k(k)
22128             .iterations(1)
22129             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22130         }
22131       }
22132     }
22133   }
22134 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_gt_8)22135   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_gt_8) {
22136     TEST_REQUIRES_ARM_NEON_DOT;
22137     for (size_t k = 9; k < 16; k++) {
22138       GemmMicrokernelTester()
22139         .mr(8)
22140         .nr(8)
22141         .kr(4)
22142         .sr(1)
22143         .m(8)
22144         .n(8)
22145         .k(k)
22146         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22147     }
22148   }
22149 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_gt_8_subtile)22150   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_gt_8_subtile) {
22151     TEST_REQUIRES_ARM_NEON_DOT;
22152     for (size_t k = 9; k < 16; k++) {
22153       for (uint32_t n = 1; n <= 8; n++) {
22154         for (uint32_t m = 1; m <= 8; m++) {
22155           GemmMicrokernelTester()
22156             .mr(8)
22157             .nr(8)
22158             .kr(4)
22159             .sr(1)
22160             .m(m)
22161             .n(n)
22162             .k(k)
22163             .iterations(1)
22164             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22165         }
22166       }
22167     }
22168   }
22169 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_div_8)22170   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_div_8) {
22171     TEST_REQUIRES_ARM_NEON_DOT;
22172     for (size_t k = 16; k <= 80; k += 8) {
22173       GemmMicrokernelTester()
22174         .mr(8)
22175         .nr(8)
22176         .kr(4)
22177         .sr(1)
22178         .m(8)
22179         .n(8)
22180         .k(k)
22181         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22182     }
22183   }
22184 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_div_8_subtile)22185   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_div_8_subtile) {
22186     TEST_REQUIRES_ARM_NEON_DOT;
22187     for (size_t k = 16; k <= 80; k += 8) {
22188       for (uint32_t n = 1; n <= 8; n++) {
22189         for (uint32_t m = 1; m <= 8; m++) {
22190           GemmMicrokernelTester()
22191             .mr(8)
22192             .nr(8)
22193             .kr(4)
22194             .sr(1)
22195             .m(m)
22196             .n(n)
22197             .k(k)
22198             .iterations(1)
22199             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22200         }
22201       }
22202     }
22203   }
22204 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,n_gt_8)22205   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_gt_8) {
22206     TEST_REQUIRES_ARM_NEON_DOT;
22207     for (uint32_t n = 9; n < 16; n++) {
22208       for (size_t k = 1; k <= 40; k += 9) {
22209         GemmMicrokernelTester()
22210           .mr(8)
22211           .nr(8)
22212           .kr(4)
22213           .sr(1)
22214           .m(8)
22215           .n(n)
22216           .k(k)
22217           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22218       }
22219     }
22220   }
22221 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,n_gt_8_strided_cn)22222   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_gt_8_strided_cn) {
22223     TEST_REQUIRES_ARM_NEON_DOT;
22224     for (uint32_t n = 9; n < 16; n++) {
22225       for (size_t k = 1; k <= 40; k += 9) {
22226         GemmMicrokernelTester()
22227           .mr(8)
22228           .nr(8)
22229           .kr(4)
22230           .sr(1)
22231           .m(8)
22232           .n(n)
22233           .k(k)
22234           .cn_stride(11)
22235           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22236       }
22237     }
22238   }
22239 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,n_gt_8_subtile)22240   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_gt_8_subtile) {
22241     TEST_REQUIRES_ARM_NEON_DOT;
22242     for (uint32_t n = 9; n < 16; n++) {
22243       for (size_t k = 1; k <= 40; k += 9) {
22244         for (uint32_t m = 1; m <= 8; m++) {
22245           GemmMicrokernelTester()
22246             .mr(8)
22247             .nr(8)
22248             .kr(4)
22249             .sr(1)
22250             .m(m)
22251             .n(n)
22252             .k(k)
22253             .iterations(1)
22254             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22255         }
22256       }
22257     }
22258   }
22259 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,n_div_8)22260   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_div_8) {
22261     TEST_REQUIRES_ARM_NEON_DOT;
22262     for (uint32_t n = 16; n <= 24; n += 8) {
22263       for (size_t k = 1; k <= 40; k += 9) {
22264         GemmMicrokernelTester()
22265           .mr(8)
22266           .nr(8)
22267           .kr(4)
22268           .sr(1)
22269           .m(8)
22270           .n(n)
22271           .k(k)
22272           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22273       }
22274     }
22275   }
22276 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,n_div_8_strided_cn)22277   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_div_8_strided_cn) {
22278     TEST_REQUIRES_ARM_NEON_DOT;
22279     for (uint32_t n = 16; n <= 24; n += 8) {
22280       for (size_t k = 1; k <= 40; k += 9) {
22281         GemmMicrokernelTester()
22282           .mr(8)
22283           .nr(8)
22284           .kr(4)
22285           .sr(1)
22286           .m(8)
22287           .n(n)
22288           .k(k)
22289           .cn_stride(11)
22290           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22291       }
22292     }
22293   }
22294 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,n_div_8_subtile)22295   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_div_8_subtile) {
22296     TEST_REQUIRES_ARM_NEON_DOT;
22297     for (uint32_t n = 16; n <= 24; n += 8) {
22298       for (size_t k = 1; k <= 40; k += 9) {
22299         for (uint32_t m = 1; m <= 8; m++) {
22300           GemmMicrokernelTester()
22301             .mr(8)
22302             .nr(8)
22303             .kr(4)
22304             .sr(1)
22305             .m(m)
22306             .n(n)
22307             .k(k)
22308             .iterations(1)
22309             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22310         }
22311       }
22312     }
22313   }
22314 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,small_kernel)22315   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, small_kernel) {
22316     TEST_REQUIRES_ARM_NEON_DOT;
22317     for (size_t k = 1; k <= 40; k += 9) {
22318       GemmMicrokernelTester()
22319         .mr(8)
22320         .nr(8)
22321         .kr(4)
22322         .sr(1)
22323         .m(8)
22324         .n(8)
22325         .k(k)
22326         .ks(3)
22327         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22328     }
22329   }
22330 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,small_kernel_subtile)22331   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, small_kernel_subtile) {
22332     TEST_REQUIRES_ARM_NEON_DOT;
22333     for (size_t k = 1; k <= 40; k += 9) {
22334       for (uint32_t n = 1; n <= 8; n++) {
22335         for (uint32_t m = 1; m <= 8; m++) {
22336           GemmMicrokernelTester()
22337             .mr(8)
22338             .nr(8)
22339             .kr(4)
22340             .sr(1)
22341             .m(m)
22342             .n(n)
22343             .k(k)
22344             .ks(3)
22345             .iterations(1)
22346             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22347         }
22348       }
22349     }
22350   }
22351 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,n_gt_8_small_kernel)22352   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_gt_8_small_kernel) {
22353     TEST_REQUIRES_ARM_NEON_DOT;
22354     for (uint32_t n = 9; n < 16; n++) {
22355       for (size_t k = 1; k <= 40; k += 9) {
22356         GemmMicrokernelTester()
22357           .mr(8)
22358           .nr(8)
22359           .kr(4)
22360           .sr(1)
22361           .m(8)
22362           .n(n)
22363           .k(k)
22364           .ks(3)
22365           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22366       }
22367     }
22368   }
22369 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,n_div_8_small_kernel)22370   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_div_8_small_kernel) {
22371     TEST_REQUIRES_ARM_NEON_DOT;
22372     for (uint32_t n = 16; n <= 24; n += 8) {
22373       for (size_t k = 1; k <= 40; k += 9) {
22374         GemmMicrokernelTester()
22375           .mr(8)
22376           .nr(8)
22377           .kr(4)
22378           .sr(1)
22379           .m(8)
22380           .n(n)
22381           .k(k)
22382           .ks(3)
22383           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22384       }
22385     }
22386   }
22387 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,strided_cm_subtile)22388   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, strided_cm_subtile) {
22389     TEST_REQUIRES_ARM_NEON_DOT;
22390     for (size_t k = 1; k <= 40; k += 9) {
22391       for (uint32_t n = 1; n <= 8; n++) {
22392         for (uint32_t m = 1; m <= 8; m++) {
22393           GemmMicrokernelTester()
22394             .mr(8)
22395             .nr(8)
22396             .kr(4)
22397             .sr(1)
22398             .m(m)
22399             .n(n)
22400             .k(k)
22401             .cm_stride(11)
22402             .iterations(1)
22403             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22404         }
22405       }
22406     }
22407   }
22408 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,a_offset)22409   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, a_offset) {
22410     TEST_REQUIRES_ARM_NEON_DOT;
22411     for (size_t k = 1; k <= 40; k += 9) {
22412       GemmMicrokernelTester()
22413         .mr(8)
22414         .nr(8)
22415         .kr(4)
22416         .sr(1)
22417         .m(8)
22418         .n(8)
22419         .k(k)
22420         .ks(3)
22421         .a_offset(331)
22422         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22423     }
22424   }
22425 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,zero)22426   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, zero) {
22427     TEST_REQUIRES_ARM_NEON_DOT;
22428     for (size_t k = 1; k <= 40; k += 9) {
22429       for (uint32_t mz = 0; mz < 8; mz++) {
22430         GemmMicrokernelTester()
22431           .mr(8)
22432           .nr(8)
22433           .kr(4)
22434           .sr(1)
22435           .m(8)
22436           .n(8)
22437           .k(k)
22438           .ks(3)
22439           .a_offset(331)
22440           .zero_index(mz)
22441           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22442       }
22443     }
22444   }
22445 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,qmin)22446   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, qmin) {
22447     TEST_REQUIRES_ARM_NEON_DOT;
22448     GemmMicrokernelTester()
22449       .mr(8)
22450       .nr(8)
22451       .kr(4)
22452       .sr(1)
22453       .m(8)
22454       .n(8)
22455       .k(8)
22456       .qmin(128)
22457       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22458   }
22459 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,qmax)22460   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, qmax) {
22461     TEST_REQUIRES_ARM_NEON_DOT;
22462     GemmMicrokernelTester()
22463       .mr(8)
22464       .nr(8)
22465       .kr(4)
22466       .sr(1)
22467       .m(8)
22468       .n(8)
22469       .k(8)
22470       .qmax(128)
22471       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22472   }
22473 
TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,strided_cm)22474   TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, strided_cm) {
22475     TEST_REQUIRES_ARM_NEON_DOT;
22476     GemmMicrokernelTester()
22477       .mr(8)
22478       .nr(8)
22479       .kr(4)
22480       .sr(1)
22481       .m(8)
22482       .n(8)
22483       .k(8)
22484       .cm_stride(11)
22485       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22486   }
22487 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
22488 
22489 
22490 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_eq_8)22491   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8) {
22492     TEST_REQUIRES_ARM_NEON;
22493     GemmMicrokernelTester()
22494       .mr(1)
22495       .nr(8)
22496       .kr(1)
22497       .sr(1)
22498       .m(1)
22499       .n(8)
22500       .k(8)
22501       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22502   }
22503 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,strided_cn)22504   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, strided_cn) {
22505     TEST_REQUIRES_ARM_NEON;
22506     GemmMicrokernelTester()
22507       .mr(1)
22508       .nr(8)
22509       .kr(1)
22510       .sr(1)
22511       .m(1)
22512       .n(8)
22513       .k(8)
22514       .cn_stride(11)
22515       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22516   }
22517 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_eq_8_subtile)22518   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8_subtile) {
22519     TEST_REQUIRES_ARM_NEON;
22520     for (uint32_t n = 1; n <= 8; n++) {
22521       for (uint32_t m = 1; m <= 1; m++) {
22522         GemmMicrokernelTester()
22523           .mr(1)
22524           .nr(8)
22525           .kr(1)
22526           .sr(1)
22527           .m(m)
22528           .n(n)
22529           .k(8)
22530           .iterations(1)
22531           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22532       }
22533     }
22534   }
22535 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_eq_8_subtile_m)22536   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
22537     TEST_REQUIRES_ARM_NEON;
22538     for (uint32_t m = 1; m <= 1; m++) {
22539       GemmMicrokernelTester()
22540         .mr(1)
22541         .nr(8)
22542         .kr(1)
22543         .sr(1)
22544         .m(m)
22545         .n(8)
22546         .k(8)
22547         .iterations(1)
22548         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22549     }
22550   }
22551 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_eq_8_subtile_n)22552   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
22553     TEST_REQUIRES_ARM_NEON;
22554     for (uint32_t n = 1; n <= 8; n++) {
22555       GemmMicrokernelTester()
22556         .mr(1)
22557         .nr(8)
22558         .kr(1)
22559         .sr(1)
22560         .m(1)
22561         .n(n)
22562         .k(8)
22563         .iterations(1)
22564         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22565     }
22566   }
22567 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_lt_8)22568   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_lt_8) {
22569     TEST_REQUIRES_ARM_NEON;
22570     for (size_t k = 1; k < 8; k++) {
22571       GemmMicrokernelTester()
22572         .mr(1)
22573         .nr(8)
22574         .kr(1)
22575         .sr(1)
22576         .m(1)
22577         .n(8)
22578         .k(k)
22579         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22580     }
22581   }
22582 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_lt_8_subtile)22583   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_lt_8_subtile) {
22584     TEST_REQUIRES_ARM_NEON;
22585     for (size_t k = 1; k < 8; k++) {
22586       for (uint32_t n = 1; n <= 8; n++) {
22587         for (uint32_t m = 1; m <= 1; m++) {
22588           GemmMicrokernelTester()
22589             .mr(1)
22590             .nr(8)
22591             .kr(1)
22592             .sr(1)
22593             .m(m)
22594             .n(n)
22595             .k(k)
22596             .iterations(1)
22597             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22598         }
22599       }
22600     }
22601   }
22602 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_gt_8)22603   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_gt_8) {
22604     TEST_REQUIRES_ARM_NEON;
22605     for (size_t k = 9; k < 16; k++) {
22606       GemmMicrokernelTester()
22607         .mr(1)
22608         .nr(8)
22609         .kr(1)
22610         .sr(1)
22611         .m(1)
22612         .n(8)
22613         .k(k)
22614         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22615     }
22616   }
22617 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_gt_8_subtile)22618   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_gt_8_subtile) {
22619     TEST_REQUIRES_ARM_NEON;
22620     for (size_t k = 9; k < 16; k++) {
22621       for (uint32_t n = 1; n <= 8; n++) {
22622         for (uint32_t m = 1; m <= 1; m++) {
22623           GemmMicrokernelTester()
22624             .mr(1)
22625             .nr(8)
22626             .kr(1)
22627             .sr(1)
22628             .m(m)
22629             .n(n)
22630             .k(k)
22631             .iterations(1)
22632             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22633         }
22634       }
22635     }
22636   }
22637 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_div_8)22638   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_div_8) {
22639     TEST_REQUIRES_ARM_NEON;
22640     for (size_t k = 16; k <= 80; k += 8) {
22641       GemmMicrokernelTester()
22642         .mr(1)
22643         .nr(8)
22644         .kr(1)
22645         .sr(1)
22646         .m(1)
22647         .n(8)
22648         .k(k)
22649         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22650     }
22651   }
22652 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_div_8_subtile)22653   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_div_8_subtile) {
22654     TEST_REQUIRES_ARM_NEON;
22655     for (size_t k = 16; k <= 80; k += 8) {
22656       for (uint32_t n = 1; n <= 8; n++) {
22657         for (uint32_t m = 1; m <= 1; m++) {
22658           GemmMicrokernelTester()
22659             .mr(1)
22660             .nr(8)
22661             .kr(1)
22662             .sr(1)
22663             .m(m)
22664             .n(n)
22665             .k(k)
22666             .iterations(1)
22667             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22668         }
22669       }
22670     }
22671   }
22672 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,n_gt_8)22673   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_gt_8) {
22674     TEST_REQUIRES_ARM_NEON;
22675     for (uint32_t n = 9; n < 16; n++) {
22676       for (size_t k = 1; k <= 40; k += 9) {
22677         GemmMicrokernelTester()
22678           .mr(1)
22679           .nr(8)
22680           .kr(1)
22681           .sr(1)
22682           .m(1)
22683           .n(n)
22684           .k(k)
22685           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22686       }
22687     }
22688   }
22689 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,n_gt_8_strided_cn)22690   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
22691     TEST_REQUIRES_ARM_NEON;
22692     for (uint32_t n = 9; n < 16; n++) {
22693       for (size_t k = 1; k <= 40; k += 9) {
22694         GemmMicrokernelTester()
22695           .mr(1)
22696           .nr(8)
22697           .kr(1)
22698           .sr(1)
22699           .m(1)
22700           .n(n)
22701           .k(k)
22702           .cn_stride(11)
22703           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22704       }
22705     }
22706   }
22707 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,n_gt_8_subtile)22708   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_gt_8_subtile) {
22709     TEST_REQUIRES_ARM_NEON;
22710     for (uint32_t n = 9; n < 16; n++) {
22711       for (size_t k = 1; k <= 40; k += 9) {
22712         for (uint32_t m = 1; m <= 1; m++) {
22713           GemmMicrokernelTester()
22714             .mr(1)
22715             .nr(8)
22716             .kr(1)
22717             .sr(1)
22718             .m(m)
22719             .n(n)
22720             .k(k)
22721             .iterations(1)
22722             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22723         }
22724       }
22725     }
22726   }
22727 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,n_div_8)22728   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_div_8) {
22729     TEST_REQUIRES_ARM_NEON;
22730     for (uint32_t n = 16; n <= 24; n += 8) {
22731       for (size_t k = 1; k <= 40; k += 9) {
22732         GemmMicrokernelTester()
22733           .mr(1)
22734           .nr(8)
22735           .kr(1)
22736           .sr(1)
22737           .m(1)
22738           .n(n)
22739           .k(k)
22740           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22741       }
22742     }
22743   }
22744 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,n_div_8_strided_cn)22745   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
22746     TEST_REQUIRES_ARM_NEON;
22747     for (uint32_t n = 16; n <= 24; n += 8) {
22748       for (size_t k = 1; k <= 40; k += 9) {
22749         GemmMicrokernelTester()
22750           .mr(1)
22751           .nr(8)
22752           .kr(1)
22753           .sr(1)
22754           .m(1)
22755           .n(n)
22756           .k(k)
22757           .cn_stride(11)
22758           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22759       }
22760     }
22761   }
22762 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,n_div_8_subtile)22763   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_div_8_subtile) {
22764     TEST_REQUIRES_ARM_NEON;
22765     for (uint32_t n = 16; n <= 24; n += 8) {
22766       for (size_t k = 1; k <= 40; k += 9) {
22767         for (uint32_t m = 1; m <= 1; m++) {
22768           GemmMicrokernelTester()
22769             .mr(1)
22770             .nr(8)
22771             .kr(1)
22772             .sr(1)
22773             .m(m)
22774             .n(n)
22775             .k(k)
22776             .iterations(1)
22777             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22778         }
22779       }
22780     }
22781   }
22782 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,small_kernel)22783   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, small_kernel) {
22784     TEST_REQUIRES_ARM_NEON;
22785     for (size_t k = 1; k <= 40; k += 9) {
22786       GemmMicrokernelTester()
22787         .mr(1)
22788         .nr(8)
22789         .kr(1)
22790         .sr(1)
22791         .m(1)
22792         .n(8)
22793         .k(k)
22794         .ks(3)
22795         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22796     }
22797   }
22798 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,small_kernel_subtile)22799   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, small_kernel_subtile) {
22800     TEST_REQUIRES_ARM_NEON;
22801     for (size_t k = 1; k <= 40; k += 9) {
22802       for (uint32_t n = 1; n <= 8; n++) {
22803         for (uint32_t m = 1; m <= 1; m++) {
22804           GemmMicrokernelTester()
22805             .mr(1)
22806             .nr(8)
22807             .kr(1)
22808             .sr(1)
22809             .m(m)
22810             .n(n)
22811             .k(k)
22812             .ks(3)
22813             .iterations(1)
22814             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22815         }
22816       }
22817     }
22818   }
22819 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,n_gt_8_small_kernel)22820   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
22821     TEST_REQUIRES_ARM_NEON;
22822     for (uint32_t n = 9; n < 16; n++) {
22823       for (size_t k = 1; k <= 40; k += 9) {
22824         GemmMicrokernelTester()
22825           .mr(1)
22826           .nr(8)
22827           .kr(1)
22828           .sr(1)
22829           .m(1)
22830           .n(n)
22831           .k(k)
22832           .ks(3)
22833           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22834       }
22835     }
22836   }
22837 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,n_div_8_small_kernel)22838   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
22839     TEST_REQUIRES_ARM_NEON;
22840     for (uint32_t n = 16; n <= 24; n += 8) {
22841       for (size_t k = 1; k <= 40; k += 9) {
22842         GemmMicrokernelTester()
22843           .mr(1)
22844           .nr(8)
22845           .kr(1)
22846           .sr(1)
22847           .m(1)
22848           .n(n)
22849           .k(k)
22850           .ks(3)
22851           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22852       }
22853     }
22854   }
22855 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,strided_cm_subtile)22856   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, strided_cm_subtile) {
22857     TEST_REQUIRES_ARM_NEON;
22858     for (size_t k = 1; k <= 40; k += 9) {
22859       for (uint32_t n = 1; n <= 8; n++) {
22860         for (uint32_t m = 1; m <= 1; m++) {
22861           GemmMicrokernelTester()
22862             .mr(1)
22863             .nr(8)
22864             .kr(1)
22865             .sr(1)
22866             .m(m)
22867             .n(n)
22868             .k(k)
22869             .cm_stride(11)
22870             .iterations(1)
22871             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22872         }
22873       }
22874     }
22875   }
22876 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,a_offset)22877   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, a_offset) {
22878     TEST_REQUIRES_ARM_NEON;
22879     for (size_t k = 1; k <= 40; k += 9) {
22880       GemmMicrokernelTester()
22881         .mr(1)
22882         .nr(8)
22883         .kr(1)
22884         .sr(1)
22885         .m(1)
22886         .n(8)
22887         .k(k)
22888         .ks(3)
22889         .a_offset(43)
22890         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22891     }
22892   }
22893 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,zero)22894   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, zero) {
22895     TEST_REQUIRES_ARM_NEON;
22896     for (size_t k = 1; k <= 40; k += 9) {
22897       for (uint32_t mz = 0; mz < 1; mz++) {
22898         GemmMicrokernelTester()
22899           .mr(1)
22900           .nr(8)
22901           .kr(1)
22902           .sr(1)
22903           .m(1)
22904           .n(8)
22905           .k(k)
22906           .ks(3)
22907           .a_offset(43)
22908           .zero_index(mz)
22909           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22910       }
22911     }
22912   }
22913 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,qmin)22914   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, qmin) {
22915     TEST_REQUIRES_ARM_NEON;
22916     GemmMicrokernelTester()
22917       .mr(1)
22918       .nr(8)
22919       .kr(1)
22920       .sr(1)
22921       .m(1)
22922       .n(8)
22923       .k(8)
22924       .qmin(128)
22925       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22926   }
22927 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,qmax)22928   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, qmax) {
22929     TEST_REQUIRES_ARM_NEON;
22930     GemmMicrokernelTester()
22931       .mr(1)
22932       .nr(8)
22933       .kr(1)
22934       .sr(1)
22935       .m(1)
22936       .n(8)
22937       .k(8)
22938       .qmax(128)
22939       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22940   }
22941 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,strided_cm)22942   TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, strided_cm) {
22943     TEST_REQUIRES_ARM_NEON;
22944     GemmMicrokernelTester()
22945       .mr(1)
22946       .nr(8)
22947       .kr(1)
22948       .sr(1)
22949       .m(1)
22950       .n(8)
22951       .k(8)
22952       .cm_stride(11)
22953       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22954   }
22955 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
22956 
22957 
22958 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_eq_8)22959   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8) {
22960     TEST_REQUIRES_ARM_NEON;
22961     GemmMicrokernelTester()
22962       .mr(1)
22963       .nr(16)
22964       .kr(1)
22965       .sr(1)
22966       .m(1)
22967       .n(16)
22968       .k(8)
22969       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22970   }
22971 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,strided_cn)22972   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, strided_cn) {
22973     TEST_REQUIRES_ARM_NEON;
22974     GemmMicrokernelTester()
22975       .mr(1)
22976       .nr(16)
22977       .kr(1)
22978       .sr(1)
22979       .m(1)
22980       .n(16)
22981       .k(8)
22982       .cn_stride(19)
22983       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
22984   }
22985 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_eq_8_subtile)22986   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8_subtile) {
22987     TEST_REQUIRES_ARM_NEON;
22988     for (uint32_t n = 1; n <= 16; n++) {
22989       for (uint32_t m = 1; m <= 1; m++) {
22990         GemmMicrokernelTester()
22991           .mr(1)
22992           .nr(16)
22993           .kr(1)
22994           .sr(1)
22995           .m(m)
22996           .n(n)
22997           .k(8)
22998           .iterations(1)
22999           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23000       }
23001     }
23002   }
23003 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_eq_8_subtile_m)23004   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
23005     TEST_REQUIRES_ARM_NEON;
23006     for (uint32_t m = 1; m <= 1; m++) {
23007       GemmMicrokernelTester()
23008         .mr(1)
23009         .nr(16)
23010         .kr(1)
23011         .sr(1)
23012         .m(m)
23013         .n(16)
23014         .k(8)
23015         .iterations(1)
23016         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23017     }
23018   }
23019 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_eq_8_subtile_n)23020   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
23021     TEST_REQUIRES_ARM_NEON;
23022     for (uint32_t n = 1; n <= 16; n++) {
23023       GemmMicrokernelTester()
23024         .mr(1)
23025         .nr(16)
23026         .kr(1)
23027         .sr(1)
23028         .m(1)
23029         .n(n)
23030         .k(8)
23031         .iterations(1)
23032         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23033     }
23034   }
23035 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_lt_8)23036   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_lt_8) {
23037     TEST_REQUIRES_ARM_NEON;
23038     for (size_t k = 1; k < 8; k++) {
23039       GemmMicrokernelTester()
23040         .mr(1)
23041         .nr(16)
23042         .kr(1)
23043         .sr(1)
23044         .m(1)
23045         .n(16)
23046         .k(k)
23047         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23048     }
23049   }
23050 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_lt_8_subtile)23051   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_lt_8_subtile) {
23052     TEST_REQUIRES_ARM_NEON;
23053     for (size_t k = 1; k < 8; k++) {
23054       for (uint32_t n = 1; n <= 16; n++) {
23055         for (uint32_t m = 1; m <= 1; m++) {
23056           GemmMicrokernelTester()
23057             .mr(1)
23058             .nr(16)
23059             .kr(1)
23060             .sr(1)
23061             .m(m)
23062             .n(n)
23063             .k(k)
23064             .iterations(1)
23065             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23066         }
23067       }
23068     }
23069   }
23070 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_gt_8)23071   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_gt_8) {
23072     TEST_REQUIRES_ARM_NEON;
23073     for (size_t k = 9; k < 16; k++) {
23074       GemmMicrokernelTester()
23075         .mr(1)
23076         .nr(16)
23077         .kr(1)
23078         .sr(1)
23079         .m(1)
23080         .n(16)
23081         .k(k)
23082         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23083     }
23084   }
23085 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_gt_8_subtile)23086   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_gt_8_subtile) {
23087     TEST_REQUIRES_ARM_NEON;
23088     for (size_t k = 9; k < 16; k++) {
23089       for (uint32_t n = 1; n <= 16; n++) {
23090         for (uint32_t m = 1; m <= 1; m++) {
23091           GemmMicrokernelTester()
23092             .mr(1)
23093             .nr(16)
23094             .kr(1)
23095             .sr(1)
23096             .m(m)
23097             .n(n)
23098             .k(k)
23099             .iterations(1)
23100             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23101         }
23102       }
23103     }
23104   }
23105 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_div_8)23106   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_div_8) {
23107     TEST_REQUIRES_ARM_NEON;
23108     for (size_t k = 16; k <= 80; k += 8) {
23109       GemmMicrokernelTester()
23110         .mr(1)
23111         .nr(16)
23112         .kr(1)
23113         .sr(1)
23114         .m(1)
23115         .n(16)
23116         .k(k)
23117         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23118     }
23119   }
23120 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_div_8_subtile)23121   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_div_8_subtile) {
23122     TEST_REQUIRES_ARM_NEON;
23123     for (size_t k = 16; k <= 80; k += 8) {
23124       for (uint32_t n = 1; n <= 16; n++) {
23125         for (uint32_t m = 1; m <= 1; m++) {
23126           GemmMicrokernelTester()
23127             .mr(1)
23128             .nr(16)
23129             .kr(1)
23130             .sr(1)
23131             .m(m)
23132             .n(n)
23133             .k(k)
23134             .iterations(1)
23135             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23136         }
23137       }
23138     }
23139   }
23140 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,n_gt_16)23141   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_gt_16) {
23142     TEST_REQUIRES_ARM_NEON;
23143     for (uint32_t n = 17; n < 32; n++) {
23144       for (size_t k = 1; k <= 40; k += 9) {
23145         GemmMicrokernelTester()
23146           .mr(1)
23147           .nr(16)
23148           .kr(1)
23149           .sr(1)
23150           .m(1)
23151           .n(n)
23152           .k(k)
23153           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23154       }
23155     }
23156   }
23157 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,n_gt_16_strided_cn)23158   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
23159     TEST_REQUIRES_ARM_NEON;
23160     for (uint32_t n = 17; n < 32; n++) {
23161       for (size_t k = 1; k <= 40; k += 9) {
23162         GemmMicrokernelTester()
23163           .mr(1)
23164           .nr(16)
23165           .kr(1)
23166           .sr(1)
23167           .m(1)
23168           .n(n)
23169           .k(k)
23170           .cn_stride(19)
23171           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23172       }
23173     }
23174   }
23175 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,n_gt_16_subtile)23176   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_gt_16_subtile) {
23177     TEST_REQUIRES_ARM_NEON;
23178     for (uint32_t n = 17; n < 32; n++) {
23179       for (size_t k = 1; k <= 40; k += 9) {
23180         for (uint32_t m = 1; m <= 1; m++) {
23181           GemmMicrokernelTester()
23182             .mr(1)
23183             .nr(16)
23184             .kr(1)
23185             .sr(1)
23186             .m(m)
23187             .n(n)
23188             .k(k)
23189             .iterations(1)
23190             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23191         }
23192       }
23193     }
23194   }
23195 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,n_div_16)23196   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_div_16) {
23197     TEST_REQUIRES_ARM_NEON;
23198     for (uint32_t n = 32; n <= 48; n += 16) {
23199       for (size_t k = 1; k <= 40; k += 9) {
23200         GemmMicrokernelTester()
23201           .mr(1)
23202           .nr(16)
23203           .kr(1)
23204           .sr(1)
23205           .m(1)
23206           .n(n)
23207           .k(k)
23208           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23209       }
23210     }
23211   }
23212 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,n_div_16_strided_cn)23213   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
23214     TEST_REQUIRES_ARM_NEON;
23215     for (uint32_t n = 32; n <= 48; n += 16) {
23216       for (size_t k = 1; k <= 40; k += 9) {
23217         GemmMicrokernelTester()
23218           .mr(1)
23219           .nr(16)
23220           .kr(1)
23221           .sr(1)
23222           .m(1)
23223           .n(n)
23224           .k(k)
23225           .cn_stride(19)
23226           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23227       }
23228     }
23229   }
23230 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,n_div_16_subtile)23231   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_div_16_subtile) {
23232     TEST_REQUIRES_ARM_NEON;
23233     for (uint32_t n = 32; n <= 48; n += 16) {
23234       for (size_t k = 1; k <= 40; k += 9) {
23235         for (uint32_t m = 1; m <= 1; m++) {
23236           GemmMicrokernelTester()
23237             .mr(1)
23238             .nr(16)
23239             .kr(1)
23240             .sr(1)
23241             .m(m)
23242             .n(n)
23243             .k(k)
23244             .iterations(1)
23245             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23246         }
23247       }
23248     }
23249   }
23250 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,small_kernel)23251   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, small_kernel) {
23252     TEST_REQUIRES_ARM_NEON;
23253     for (size_t k = 1; k <= 40; k += 9) {
23254       GemmMicrokernelTester()
23255         .mr(1)
23256         .nr(16)
23257         .kr(1)
23258         .sr(1)
23259         .m(1)
23260         .n(16)
23261         .k(k)
23262         .ks(3)
23263         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23264     }
23265   }
23266 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,small_kernel_subtile)23267   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, small_kernel_subtile) {
23268     TEST_REQUIRES_ARM_NEON;
23269     for (size_t k = 1; k <= 40; k += 9) {
23270       for (uint32_t n = 1; n <= 16; n++) {
23271         for (uint32_t m = 1; m <= 1; m++) {
23272           GemmMicrokernelTester()
23273             .mr(1)
23274             .nr(16)
23275             .kr(1)
23276             .sr(1)
23277             .m(m)
23278             .n(n)
23279             .k(k)
23280             .ks(3)
23281             .iterations(1)
23282             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23283         }
23284       }
23285     }
23286   }
23287 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,n_gt_16_small_kernel)23288   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
23289     TEST_REQUIRES_ARM_NEON;
23290     for (uint32_t n = 17; n < 32; n++) {
23291       for (size_t k = 1; k <= 40; k += 9) {
23292         GemmMicrokernelTester()
23293           .mr(1)
23294           .nr(16)
23295           .kr(1)
23296           .sr(1)
23297           .m(1)
23298           .n(n)
23299           .k(k)
23300           .ks(3)
23301           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23302       }
23303     }
23304   }
23305 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,n_div_16_small_kernel)23306   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
23307     TEST_REQUIRES_ARM_NEON;
23308     for (uint32_t n = 32; n <= 48; n += 16) {
23309       for (size_t k = 1; k <= 40; k += 9) {
23310         GemmMicrokernelTester()
23311           .mr(1)
23312           .nr(16)
23313           .kr(1)
23314           .sr(1)
23315           .m(1)
23316           .n(n)
23317           .k(k)
23318           .ks(3)
23319           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23320       }
23321     }
23322   }
23323 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,strided_cm_subtile)23324   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, strided_cm_subtile) {
23325     TEST_REQUIRES_ARM_NEON;
23326     for (size_t k = 1; k <= 40; k += 9) {
23327       for (uint32_t n = 1; n <= 16; n++) {
23328         for (uint32_t m = 1; m <= 1; m++) {
23329           GemmMicrokernelTester()
23330             .mr(1)
23331             .nr(16)
23332             .kr(1)
23333             .sr(1)
23334             .m(m)
23335             .n(n)
23336             .k(k)
23337             .cm_stride(19)
23338             .iterations(1)
23339             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23340         }
23341       }
23342     }
23343   }
23344 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,a_offset)23345   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, a_offset) {
23346     TEST_REQUIRES_ARM_NEON;
23347     for (size_t k = 1; k <= 40; k += 9) {
23348       GemmMicrokernelTester()
23349         .mr(1)
23350         .nr(16)
23351         .kr(1)
23352         .sr(1)
23353         .m(1)
23354         .n(16)
23355         .k(k)
23356         .ks(3)
23357         .a_offset(43)
23358         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23359     }
23360   }
23361 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,zero)23362   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, zero) {
23363     TEST_REQUIRES_ARM_NEON;
23364     for (size_t k = 1; k <= 40; k += 9) {
23365       for (uint32_t mz = 0; mz < 1; mz++) {
23366         GemmMicrokernelTester()
23367           .mr(1)
23368           .nr(16)
23369           .kr(1)
23370           .sr(1)
23371           .m(1)
23372           .n(16)
23373           .k(k)
23374           .ks(3)
23375           .a_offset(43)
23376           .zero_index(mz)
23377           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23378       }
23379     }
23380   }
23381 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,qmin)23382   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, qmin) {
23383     TEST_REQUIRES_ARM_NEON;
23384     GemmMicrokernelTester()
23385       .mr(1)
23386       .nr(16)
23387       .kr(1)
23388       .sr(1)
23389       .m(1)
23390       .n(16)
23391       .k(8)
23392       .qmin(128)
23393       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23394   }
23395 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,qmax)23396   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, qmax) {
23397     TEST_REQUIRES_ARM_NEON;
23398     GemmMicrokernelTester()
23399       .mr(1)
23400       .nr(16)
23401       .kr(1)
23402       .sr(1)
23403       .m(1)
23404       .n(16)
23405       .k(8)
23406       .qmax(128)
23407       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23408   }
23409 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,strided_cm)23410   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, strided_cm) {
23411     TEST_REQUIRES_ARM_NEON;
23412     GemmMicrokernelTester()
23413       .mr(1)
23414       .nr(16)
23415       .kr(1)
23416       .sr(1)
23417       .m(1)
23418       .n(16)
23419       .k(8)
23420       .cm_stride(19)
23421       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23422   }
23423 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
23424 
23425 
23426 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_eq_8)23427   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_eq_8) {
23428     TEST_REQUIRES_ARM_NEON;
23429     GemmMicrokernelTester()
23430       .mr(3)
23431       .nr(16)
23432       .kr(1)
23433       .sr(1)
23434       .m(3)
23435       .n(16)
23436       .k(8)
23437       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23438   }
23439 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,strided_cn)23440   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, strided_cn) {
23441     TEST_REQUIRES_ARM_NEON;
23442     GemmMicrokernelTester()
23443       .mr(3)
23444       .nr(16)
23445       .kr(1)
23446       .sr(1)
23447       .m(3)
23448       .n(16)
23449       .k(8)
23450       .cn_stride(19)
23451       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23452   }
23453 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_eq_8_subtile)23454   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_eq_8_subtile) {
23455     TEST_REQUIRES_ARM_NEON;
23456     for (uint32_t n = 1; n <= 16; n++) {
23457       for (uint32_t m = 1; m <= 3; m++) {
23458         GemmMicrokernelTester()
23459           .mr(3)
23460           .nr(16)
23461           .kr(1)
23462           .sr(1)
23463           .m(m)
23464           .n(n)
23465           .k(8)
23466           .iterations(1)
23467           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23468       }
23469     }
23470   }
23471 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_eq_8_subtile_m)23472   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
23473     TEST_REQUIRES_ARM_NEON;
23474     for (uint32_t m = 1; m <= 3; m++) {
23475       GemmMicrokernelTester()
23476         .mr(3)
23477         .nr(16)
23478         .kr(1)
23479         .sr(1)
23480         .m(m)
23481         .n(16)
23482         .k(8)
23483         .iterations(1)
23484         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23485     }
23486   }
23487 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_eq_8_subtile_n)23488   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
23489     TEST_REQUIRES_ARM_NEON;
23490     for (uint32_t n = 1; n <= 16; n++) {
23491       GemmMicrokernelTester()
23492         .mr(3)
23493         .nr(16)
23494         .kr(1)
23495         .sr(1)
23496         .m(3)
23497         .n(n)
23498         .k(8)
23499         .iterations(1)
23500         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23501     }
23502   }
23503 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_lt_8)23504   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_lt_8) {
23505     TEST_REQUIRES_ARM_NEON;
23506     for (size_t k = 1; k < 8; k++) {
23507       GemmMicrokernelTester()
23508         .mr(3)
23509         .nr(16)
23510         .kr(1)
23511         .sr(1)
23512         .m(3)
23513         .n(16)
23514         .k(k)
23515         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23516     }
23517   }
23518 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_lt_8_subtile)23519   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_lt_8_subtile) {
23520     TEST_REQUIRES_ARM_NEON;
23521     for (size_t k = 1; k < 8; k++) {
23522       for (uint32_t n = 1; n <= 16; n++) {
23523         for (uint32_t m = 1; m <= 3; m++) {
23524           GemmMicrokernelTester()
23525             .mr(3)
23526             .nr(16)
23527             .kr(1)
23528             .sr(1)
23529             .m(m)
23530             .n(n)
23531             .k(k)
23532             .iterations(1)
23533             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23534         }
23535       }
23536     }
23537   }
23538 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_gt_8)23539   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_gt_8) {
23540     TEST_REQUIRES_ARM_NEON;
23541     for (size_t k = 9; k < 16; k++) {
23542       GemmMicrokernelTester()
23543         .mr(3)
23544         .nr(16)
23545         .kr(1)
23546         .sr(1)
23547         .m(3)
23548         .n(16)
23549         .k(k)
23550         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23551     }
23552   }
23553 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_gt_8_subtile)23554   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_gt_8_subtile) {
23555     TEST_REQUIRES_ARM_NEON;
23556     for (size_t k = 9; k < 16; k++) {
23557       for (uint32_t n = 1; n <= 16; n++) {
23558         for (uint32_t m = 1; m <= 3; m++) {
23559           GemmMicrokernelTester()
23560             .mr(3)
23561             .nr(16)
23562             .kr(1)
23563             .sr(1)
23564             .m(m)
23565             .n(n)
23566             .k(k)
23567             .iterations(1)
23568             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23569         }
23570       }
23571     }
23572   }
23573 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_div_8)23574   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_div_8) {
23575     TEST_REQUIRES_ARM_NEON;
23576     for (size_t k = 16; k <= 80; k += 8) {
23577       GemmMicrokernelTester()
23578         .mr(3)
23579         .nr(16)
23580         .kr(1)
23581         .sr(1)
23582         .m(3)
23583         .n(16)
23584         .k(k)
23585         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23586     }
23587   }
23588 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_div_8_subtile)23589   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_div_8_subtile) {
23590     TEST_REQUIRES_ARM_NEON;
23591     for (size_t k = 16; k <= 80; k += 8) {
23592       for (uint32_t n = 1; n <= 16; n++) {
23593         for (uint32_t m = 1; m <= 3; m++) {
23594           GemmMicrokernelTester()
23595             .mr(3)
23596             .nr(16)
23597             .kr(1)
23598             .sr(1)
23599             .m(m)
23600             .n(n)
23601             .k(k)
23602             .iterations(1)
23603             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23604         }
23605       }
23606     }
23607   }
23608 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,n_gt_16)23609   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_gt_16) {
23610     TEST_REQUIRES_ARM_NEON;
23611     for (uint32_t n = 17; n < 32; n++) {
23612       for (size_t k = 1; k <= 40; k += 9) {
23613         GemmMicrokernelTester()
23614           .mr(3)
23615           .nr(16)
23616           .kr(1)
23617           .sr(1)
23618           .m(3)
23619           .n(n)
23620           .k(k)
23621           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23622       }
23623     }
23624   }
23625 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,n_gt_16_strided_cn)23626   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
23627     TEST_REQUIRES_ARM_NEON;
23628     for (uint32_t n = 17; n < 32; n++) {
23629       for (size_t k = 1; k <= 40; k += 9) {
23630         GemmMicrokernelTester()
23631           .mr(3)
23632           .nr(16)
23633           .kr(1)
23634           .sr(1)
23635           .m(3)
23636           .n(n)
23637           .k(k)
23638           .cn_stride(19)
23639           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23640       }
23641     }
23642   }
23643 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,n_gt_16_subtile)23644   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_gt_16_subtile) {
23645     TEST_REQUIRES_ARM_NEON;
23646     for (uint32_t n = 17; n < 32; n++) {
23647       for (size_t k = 1; k <= 40; k += 9) {
23648         for (uint32_t m = 1; m <= 3; m++) {
23649           GemmMicrokernelTester()
23650             .mr(3)
23651             .nr(16)
23652             .kr(1)
23653             .sr(1)
23654             .m(m)
23655             .n(n)
23656             .k(k)
23657             .iterations(1)
23658             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23659         }
23660       }
23661     }
23662   }
23663 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,n_div_16)23664   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_div_16) {
23665     TEST_REQUIRES_ARM_NEON;
23666     for (uint32_t n = 32; n <= 48; n += 16) {
23667       for (size_t k = 1; k <= 40; k += 9) {
23668         GemmMicrokernelTester()
23669           .mr(3)
23670           .nr(16)
23671           .kr(1)
23672           .sr(1)
23673           .m(3)
23674           .n(n)
23675           .k(k)
23676           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23677       }
23678     }
23679   }
23680 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,n_div_16_strided_cn)23681   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
23682     TEST_REQUIRES_ARM_NEON;
23683     for (uint32_t n = 32; n <= 48; n += 16) {
23684       for (size_t k = 1; k <= 40; k += 9) {
23685         GemmMicrokernelTester()
23686           .mr(3)
23687           .nr(16)
23688           .kr(1)
23689           .sr(1)
23690           .m(3)
23691           .n(n)
23692           .k(k)
23693           .cn_stride(19)
23694           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23695       }
23696     }
23697   }
23698 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,n_div_16_subtile)23699   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_div_16_subtile) {
23700     TEST_REQUIRES_ARM_NEON;
23701     for (uint32_t n = 32; n <= 48; n += 16) {
23702       for (size_t k = 1; k <= 40; k += 9) {
23703         for (uint32_t m = 1; m <= 3; m++) {
23704           GemmMicrokernelTester()
23705             .mr(3)
23706             .nr(16)
23707             .kr(1)
23708             .sr(1)
23709             .m(m)
23710             .n(n)
23711             .k(k)
23712             .iterations(1)
23713             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23714         }
23715       }
23716     }
23717   }
23718 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,small_kernel)23719   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, small_kernel) {
23720     TEST_REQUIRES_ARM_NEON;
23721     for (size_t k = 1; k <= 40; k += 9) {
23722       GemmMicrokernelTester()
23723         .mr(3)
23724         .nr(16)
23725         .kr(1)
23726         .sr(1)
23727         .m(3)
23728         .n(16)
23729         .k(k)
23730         .ks(3)
23731         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23732     }
23733   }
23734 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,small_kernel_subtile)23735   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, small_kernel_subtile) {
23736     TEST_REQUIRES_ARM_NEON;
23737     for (size_t k = 1; k <= 40; k += 9) {
23738       for (uint32_t n = 1; n <= 16; n++) {
23739         for (uint32_t m = 1; m <= 3; m++) {
23740           GemmMicrokernelTester()
23741             .mr(3)
23742             .nr(16)
23743             .kr(1)
23744             .sr(1)
23745             .m(m)
23746             .n(n)
23747             .k(k)
23748             .ks(3)
23749             .iterations(1)
23750             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23751         }
23752       }
23753     }
23754   }
23755 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,n_gt_16_small_kernel)23756   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
23757     TEST_REQUIRES_ARM_NEON;
23758     for (uint32_t n = 17; n < 32; n++) {
23759       for (size_t k = 1; k <= 40; k += 9) {
23760         GemmMicrokernelTester()
23761           .mr(3)
23762           .nr(16)
23763           .kr(1)
23764           .sr(1)
23765           .m(3)
23766           .n(n)
23767           .k(k)
23768           .ks(3)
23769           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23770       }
23771     }
23772   }
23773 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,n_div_16_small_kernel)23774   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
23775     TEST_REQUIRES_ARM_NEON;
23776     for (uint32_t n = 32; n <= 48; n += 16) {
23777       for (size_t k = 1; k <= 40; k += 9) {
23778         GemmMicrokernelTester()
23779           .mr(3)
23780           .nr(16)
23781           .kr(1)
23782           .sr(1)
23783           .m(3)
23784           .n(n)
23785           .k(k)
23786           .ks(3)
23787           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23788       }
23789     }
23790   }
23791 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,strided_cm_subtile)23792   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, strided_cm_subtile) {
23793     TEST_REQUIRES_ARM_NEON;
23794     for (size_t k = 1; k <= 40; k += 9) {
23795       for (uint32_t n = 1; n <= 16; n++) {
23796         for (uint32_t m = 1; m <= 3; m++) {
23797           GemmMicrokernelTester()
23798             .mr(3)
23799             .nr(16)
23800             .kr(1)
23801             .sr(1)
23802             .m(m)
23803             .n(n)
23804             .k(k)
23805             .cm_stride(19)
23806             .iterations(1)
23807             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23808         }
23809       }
23810     }
23811   }
23812 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,a_offset)23813   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, a_offset) {
23814     TEST_REQUIRES_ARM_NEON;
23815     for (size_t k = 1; k <= 40; k += 9) {
23816       GemmMicrokernelTester()
23817         .mr(3)
23818         .nr(16)
23819         .kr(1)
23820         .sr(1)
23821         .m(3)
23822         .n(16)
23823         .k(k)
23824         .ks(3)
23825         .a_offset(127)
23826         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23827     }
23828   }
23829 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,zero)23830   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, zero) {
23831     TEST_REQUIRES_ARM_NEON;
23832     for (size_t k = 1; k <= 40; k += 9) {
23833       for (uint32_t mz = 0; mz < 3; mz++) {
23834         GemmMicrokernelTester()
23835           .mr(3)
23836           .nr(16)
23837           .kr(1)
23838           .sr(1)
23839           .m(3)
23840           .n(16)
23841           .k(k)
23842           .ks(3)
23843           .a_offset(127)
23844           .zero_index(mz)
23845           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23846       }
23847     }
23848   }
23849 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,qmin)23850   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, qmin) {
23851     TEST_REQUIRES_ARM_NEON;
23852     GemmMicrokernelTester()
23853       .mr(3)
23854       .nr(16)
23855       .kr(1)
23856       .sr(1)
23857       .m(3)
23858       .n(16)
23859       .k(8)
23860       .qmin(128)
23861       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23862   }
23863 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,qmax)23864   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, qmax) {
23865     TEST_REQUIRES_ARM_NEON;
23866     GemmMicrokernelTester()
23867       .mr(3)
23868       .nr(16)
23869       .kr(1)
23870       .sr(1)
23871       .m(3)
23872       .n(16)
23873       .k(8)
23874       .qmax(128)
23875       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23876   }
23877 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,strided_cm)23878   TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, strided_cm) {
23879     TEST_REQUIRES_ARM_NEON;
23880     GemmMicrokernelTester()
23881       .mr(3)
23882       .nr(16)
23883       .kr(1)
23884       .sr(1)
23885       .m(3)
23886       .n(16)
23887       .k(8)
23888       .cm_stride(19)
23889       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23890   }
23891 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
23892 
23893 
23894 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,k_eq_8)23895   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
23896     TEST_REQUIRES_ARM_NEON;
23897     GemmMicrokernelTester()
23898       .mr(1)
23899       .nr(16)
23900       .kr(1)
23901       .sr(1)
23902       .m(1)
23903       .n(16)
23904       .k(8)
23905       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23906   }
23907 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,strided_cn)23908   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, strided_cn) {
23909     TEST_REQUIRES_ARM_NEON;
23910     GemmMicrokernelTester()
23911       .mr(1)
23912       .nr(16)
23913       .kr(1)
23914       .sr(1)
23915       .m(1)
23916       .n(16)
23917       .k(8)
23918       .cn_stride(19)
23919       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23920   }
23921 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)23922   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
23923     TEST_REQUIRES_ARM_NEON;
23924     for (uint32_t n = 1; n <= 16; n++) {
23925       for (uint32_t m = 1; m <= 1; m++) {
23926         GemmMicrokernelTester()
23927           .mr(1)
23928           .nr(16)
23929           .kr(1)
23930           .sr(1)
23931           .m(m)
23932           .n(n)
23933           .k(8)
23934           .iterations(1)
23935           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23936       }
23937     }
23938   }
23939 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)23940   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
23941     TEST_REQUIRES_ARM_NEON;
23942     for (uint32_t m = 1; m <= 1; m++) {
23943       GemmMicrokernelTester()
23944         .mr(1)
23945         .nr(16)
23946         .kr(1)
23947         .sr(1)
23948         .m(m)
23949         .n(16)
23950         .k(8)
23951         .iterations(1)
23952         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23953     }
23954   }
23955 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)23956   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
23957     TEST_REQUIRES_ARM_NEON;
23958     for (uint32_t n = 1; n <= 16; n++) {
23959       GemmMicrokernelTester()
23960         .mr(1)
23961         .nr(16)
23962         .kr(1)
23963         .sr(1)
23964         .m(1)
23965         .n(n)
23966         .k(8)
23967         .iterations(1)
23968         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23969     }
23970   }
23971 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,k_lt_8)23972   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
23973     TEST_REQUIRES_ARM_NEON;
23974     for (size_t k = 1; k < 8; k++) {
23975       GemmMicrokernelTester()
23976         .mr(1)
23977         .nr(16)
23978         .kr(1)
23979         .sr(1)
23980         .m(1)
23981         .n(16)
23982         .k(k)
23983         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
23984     }
23985   }
23986 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)23987   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
23988     TEST_REQUIRES_ARM_NEON;
23989     for (size_t k = 1; k < 8; k++) {
23990       for (uint32_t n = 1; n <= 16; n++) {
23991         for (uint32_t m = 1; m <= 1; m++) {
23992           GemmMicrokernelTester()
23993             .mr(1)
23994             .nr(16)
23995             .kr(1)
23996             .sr(1)
23997             .m(m)
23998             .n(n)
23999             .k(k)
24000             .iterations(1)
24001             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24002         }
24003       }
24004     }
24005   }
24006 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,k_gt_8)24007   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
24008     TEST_REQUIRES_ARM_NEON;
24009     for (size_t k = 9; k < 16; k++) {
24010       GemmMicrokernelTester()
24011         .mr(1)
24012         .nr(16)
24013         .kr(1)
24014         .sr(1)
24015         .m(1)
24016         .n(16)
24017         .k(k)
24018         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24019     }
24020   }
24021 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)24022   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
24023     TEST_REQUIRES_ARM_NEON;
24024     for (size_t k = 9; k < 16; k++) {
24025       for (uint32_t n = 1; n <= 16; n++) {
24026         for (uint32_t m = 1; m <= 1; m++) {
24027           GemmMicrokernelTester()
24028             .mr(1)
24029             .nr(16)
24030             .kr(1)
24031             .sr(1)
24032             .m(m)
24033             .n(n)
24034             .k(k)
24035             .iterations(1)
24036             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24037         }
24038       }
24039     }
24040   }
24041 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,k_div_8)24042   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_div_8) {
24043     TEST_REQUIRES_ARM_NEON;
24044     for (size_t k = 16; k <= 80; k += 8) {
24045       GemmMicrokernelTester()
24046         .mr(1)
24047         .nr(16)
24048         .kr(1)
24049         .sr(1)
24050         .m(1)
24051         .n(16)
24052         .k(k)
24053         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24054     }
24055   }
24056 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,k_div_8_subtile)24057   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
24058     TEST_REQUIRES_ARM_NEON;
24059     for (size_t k = 16; k <= 80; k += 8) {
24060       for (uint32_t n = 1; n <= 16; n++) {
24061         for (uint32_t m = 1; m <= 1; m++) {
24062           GemmMicrokernelTester()
24063             .mr(1)
24064             .nr(16)
24065             .kr(1)
24066             .sr(1)
24067             .m(m)
24068             .n(n)
24069             .k(k)
24070             .iterations(1)
24071             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24072         }
24073       }
24074     }
24075   }
24076 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,n_gt_16)24077   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
24078     TEST_REQUIRES_ARM_NEON;
24079     for (uint32_t n = 17; n < 32; n++) {
24080       for (size_t k = 1; k <= 40; k += 9) {
24081         GemmMicrokernelTester()
24082           .mr(1)
24083           .nr(16)
24084           .kr(1)
24085           .sr(1)
24086           .m(1)
24087           .n(n)
24088           .k(k)
24089           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24090       }
24091     }
24092   }
24093 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_cn)24094   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
24095     TEST_REQUIRES_ARM_NEON;
24096     for (uint32_t n = 17; n < 32; n++) {
24097       for (size_t k = 1; k <= 40; k += 9) {
24098         GemmMicrokernelTester()
24099           .mr(1)
24100           .nr(16)
24101           .kr(1)
24102           .sr(1)
24103           .m(1)
24104           .n(n)
24105           .k(k)
24106           .cn_stride(19)
24107           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24108       }
24109     }
24110   }
24111 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,n_gt_16_subtile)24112   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
24113     TEST_REQUIRES_ARM_NEON;
24114     for (uint32_t n = 17; n < 32; n++) {
24115       for (size_t k = 1; k <= 40; k += 9) {
24116         for (uint32_t m = 1; m <= 1; m++) {
24117           GemmMicrokernelTester()
24118             .mr(1)
24119             .nr(16)
24120             .kr(1)
24121             .sr(1)
24122             .m(m)
24123             .n(n)
24124             .k(k)
24125             .iterations(1)
24126             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24127         }
24128       }
24129     }
24130   }
24131 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,n_div_16)24132   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_div_16) {
24133     TEST_REQUIRES_ARM_NEON;
24134     for (uint32_t n = 32; n <= 48; n += 16) {
24135       for (size_t k = 1; k <= 40; k += 9) {
24136         GemmMicrokernelTester()
24137           .mr(1)
24138           .nr(16)
24139           .kr(1)
24140           .sr(1)
24141           .m(1)
24142           .n(n)
24143           .k(k)
24144           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24145       }
24146     }
24147   }
24148 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_cn)24149   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
24150     TEST_REQUIRES_ARM_NEON;
24151     for (uint32_t n = 32; n <= 48; n += 16) {
24152       for (size_t k = 1; k <= 40; k += 9) {
24153         GemmMicrokernelTester()
24154           .mr(1)
24155           .nr(16)
24156           .kr(1)
24157           .sr(1)
24158           .m(1)
24159           .n(n)
24160           .k(k)
24161           .cn_stride(19)
24162           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24163       }
24164     }
24165   }
24166 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,n_div_16_subtile)24167   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
24168     TEST_REQUIRES_ARM_NEON;
24169     for (uint32_t n = 32; n <= 48; n += 16) {
24170       for (size_t k = 1; k <= 40; k += 9) {
24171         for (uint32_t m = 1; m <= 1; m++) {
24172           GemmMicrokernelTester()
24173             .mr(1)
24174             .nr(16)
24175             .kr(1)
24176             .sr(1)
24177             .m(m)
24178             .n(n)
24179             .k(k)
24180             .iterations(1)
24181             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24182         }
24183       }
24184     }
24185   }
24186 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,small_kernel)24187   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, small_kernel) {
24188     TEST_REQUIRES_ARM_NEON;
24189     for (size_t k = 1; k <= 40; k += 9) {
24190       GemmMicrokernelTester()
24191         .mr(1)
24192         .nr(16)
24193         .kr(1)
24194         .sr(1)
24195         .m(1)
24196         .n(16)
24197         .k(k)
24198         .ks(3)
24199         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24200     }
24201   }
24202 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,small_kernel_subtile)24203   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
24204     TEST_REQUIRES_ARM_NEON;
24205     for (size_t k = 1; k <= 40; k += 9) {
24206       for (uint32_t n = 1; n <= 16; n++) {
24207         for (uint32_t m = 1; m <= 1; m++) {
24208           GemmMicrokernelTester()
24209             .mr(1)
24210             .nr(16)
24211             .kr(1)
24212             .sr(1)
24213             .m(m)
24214             .n(n)
24215             .k(k)
24216             .ks(3)
24217             .iterations(1)
24218             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24219         }
24220       }
24221     }
24222   }
24223 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,n_gt_16_small_kernel)24224   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
24225     TEST_REQUIRES_ARM_NEON;
24226     for (uint32_t n = 17; n < 32; n++) {
24227       for (size_t k = 1; k <= 40; k += 9) {
24228         GemmMicrokernelTester()
24229           .mr(1)
24230           .nr(16)
24231           .kr(1)
24232           .sr(1)
24233           .m(1)
24234           .n(n)
24235           .k(k)
24236           .ks(3)
24237           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24238       }
24239     }
24240   }
24241 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,n_div_16_small_kernel)24242   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_div_16_small_kernel) {
24243     TEST_REQUIRES_ARM_NEON;
24244     for (uint32_t n = 32; n <= 48; n += 16) {
24245       for (size_t k = 1; k <= 40; k += 9) {
24246         GemmMicrokernelTester()
24247           .mr(1)
24248           .nr(16)
24249           .kr(1)
24250           .sr(1)
24251           .m(1)
24252           .n(n)
24253           .k(k)
24254           .ks(3)
24255           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24256       }
24257     }
24258   }
24259 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,strided_cm_subtile)24260   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
24261     TEST_REQUIRES_ARM_NEON;
24262     for (size_t k = 1; k <= 40; k += 9) {
24263       for (uint32_t n = 1; n <= 16; n++) {
24264         for (uint32_t m = 1; m <= 1; m++) {
24265           GemmMicrokernelTester()
24266             .mr(1)
24267             .nr(16)
24268             .kr(1)
24269             .sr(1)
24270             .m(m)
24271             .n(n)
24272             .k(k)
24273             .cm_stride(19)
24274             .iterations(1)
24275             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24276         }
24277       }
24278     }
24279   }
24280 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,a_offset)24281   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, a_offset) {
24282     TEST_REQUIRES_ARM_NEON;
24283     for (size_t k = 1; k <= 40; k += 9) {
24284       GemmMicrokernelTester()
24285         .mr(1)
24286         .nr(16)
24287         .kr(1)
24288         .sr(1)
24289         .m(1)
24290         .n(16)
24291         .k(k)
24292         .ks(3)
24293         .a_offset(43)
24294         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24295     }
24296   }
24297 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,zero)24298   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, zero) {
24299     TEST_REQUIRES_ARM_NEON;
24300     for (size_t k = 1; k <= 40; k += 9) {
24301       for (uint32_t mz = 0; mz < 1; mz++) {
24302         GemmMicrokernelTester()
24303           .mr(1)
24304           .nr(16)
24305           .kr(1)
24306           .sr(1)
24307           .m(1)
24308           .n(16)
24309           .k(k)
24310           .ks(3)
24311           .a_offset(43)
24312           .zero_index(mz)
24313           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24314       }
24315     }
24316   }
24317 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,qmin)24318   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, qmin) {
24319     TEST_REQUIRES_ARM_NEON;
24320     GemmMicrokernelTester()
24321       .mr(1)
24322       .nr(16)
24323       .kr(1)
24324       .sr(1)
24325       .m(1)
24326       .n(16)
24327       .k(8)
24328       .qmin(128)
24329       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24330   }
24331 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,qmax)24332   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, qmax) {
24333     TEST_REQUIRES_ARM_NEON;
24334     GemmMicrokernelTester()
24335       .mr(1)
24336       .nr(16)
24337       .kr(1)
24338       .sr(1)
24339       .m(1)
24340       .n(16)
24341       .k(8)
24342       .qmax(128)
24343       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24344   }
24345 
TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM,strided_cm)24346   TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, strided_cm) {
24347     TEST_REQUIRES_ARM_NEON;
24348     GemmMicrokernelTester()
24349       .mr(1)
24350       .nr(16)
24351       .kr(1)
24352       .sr(1)
24353       .m(1)
24354       .n(16)
24355       .k(8)
24356       .cm_stride(19)
24357       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24358   }
24359 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
24360 
24361 
24362 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,k_eq_8)24363   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
24364     TEST_REQUIRES_ARM_NEON;
24365     GemmMicrokernelTester()
24366       .mr(6)
24367       .nr(16)
24368       .kr(1)
24369       .sr(1)
24370       .m(6)
24371       .n(16)
24372       .k(8)
24373       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24374   }
24375 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,strided_cn)24376   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, strided_cn) {
24377     TEST_REQUIRES_ARM_NEON;
24378     GemmMicrokernelTester()
24379       .mr(6)
24380       .nr(16)
24381       .kr(1)
24382       .sr(1)
24383       .m(6)
24384       .n(16)
24385       .k(8)
24386       .cn_stride(19)
24387       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24388   }
24389 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)24390   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
24391     TEST_REQUIRES_ARM_NEON;
24392     for (uint32_t n = 1; n <= 16; n++) {
24393       for (uint32_t m = 1; m <= 6; m++) {
24394         GemmMicrokernelTester()
24395           .mr(6)
24396           .nr(16)
24397           .kr(1)
24398           .sr(1)
24399           .m(m)
24400           .n(n)
24401           .k(8)
24402           .iterations(1)
24403           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24404       }
24405     }
24406   }
24407 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)24408   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
24409     TEST_REQUIRES_ARM_NEON;
24410     for (uint32_t m = 1; m <= 6; m++) {
24411       GemmMicrokernelTester()
24412         .mr(6)
24413         .nr(16)
24414         .kr(1)
24415         .sr(1)
24416         .m(m)
24417         .n(16)
24418         .k(8)
24419         .iterations(1)
24420         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24421     }
24422   }
24423 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)24424   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
24425     TEST_REQUIRES_ARM_NEON;
24426     for (uint32_t n = 1; n <= 16; n++) {
24427       GemmMicrokernelTester()
24428         .mr(6)
24429         .nr(16)
24430         .kr(1)
24431         .sr(1)
24432         .m(6)
24433         .n(n)
24434         .k(8)
24435         .iterations(1)
24436         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24437     }
24438   }
24439 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,k_lt_8)24440   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
24441     TEST_REQUIRES_ARM_NEON;
24442     for (size_t k = 1; k < 8; k++) {
24443       GemmMicrokernelTester()
24444         .mr(6)
24445         .nr(16)
24446         .kr(1)
24447         .sr(1)
24448         .m(6)
24449         .n(16)
24450         .k(k)
24451         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24452     }
24453   }
24454 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)24455   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
24456     TEST_REQUIRES_ARM_NEON;
24457     for (size_t k = 1; k < 8; k++) {
24458       for (uint32_t n = 1; n <= 16; n++) {
24459         for (uint32_t m = 1; m <= 6; m++) {
24460           GemmMicrokernelTester()
24461             .mr(6)
24462             .nr(16)
24463             .kr(1)
24464             .sr(1)
24465             .m(m)
24466             .n(n)
24467             .k(k)
24468             .iterations(1)
24469             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24470         }
24471       }
24472     }
24473   }
24474 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,k_gt_8)24475   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
24476     TEST_REQUIRES_ARM_NEON;
24477     for (size_t k = 9; k < 16; k++) {
24478       GemmMicrokernelTester()
24479         .mr(6)
24480         .nr(16)
24481         .kr(1)
24482         .sr(1)
24483         .m(6)
24484         .n(16)
24485         .k(k)
24486         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24487     }
24488   }
24489 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)24490   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
24491     TEST_REQUIRES_ARM_NEON;
24492     for (size_t k = 9; k < 16; k++) {
24493       for (uint32_t n = 1; n <= 16; n++) {
24494         for (uint32_t m = 1; m <= 6; m++) {
24495           GemmMicrokernelTester()
24496             .mr(6)
24497             .nr(16)
24498             .kr(1)
24499             .sr(1)
24500             .m(m)
24501             .n(n)
24502             .k(k)
24503             .iterations(1)
24504             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24505         }
24506       }
24507     }
24508   }
24509 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,k_div_8)24510   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_div_8) {
24511     TEST_REQUIRES_ARM_NEON;
24512     for (size_t k = 16; k <= 80; k += 8) {
24513       GemmMicrokernelTester()
24514         .mr(6)
24515         .nr(16)
24516         .kr(1)
24517         .sr(1)
24518         .m(6)
24519         .n(16)
24520         .k(k)
24521         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24522     }
24523   }
24524 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,k_div_8_subtile)24525   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
24526     TEST_REQUIRES_ARM_NEON;
24527     for (size_t k = 16; k <= 80; k += 8) {
24528       for (uint32_t n = 1; n <= 16; n++) {
24529         for (uint32_t m = 1; m <= 6; m++) {
24530           GemmMicrokernelTester()
24531             .mr(6)
24532             .nr(16)
24533             .kr(1)
24534             .sr(1)
24535             .m(m)
24536             .n(n)
24537             .k(k)
24538             .iterations(1)
24539             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24540         }
24541       }
24542     }
24543   }
24544 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,n_gt_16)24545   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
24546     TEST_REQUIRES_ARM_NEON;
24547     for (uint32_t n = 17; n < 32; n++) {
24548       for (size_t k = 1; k <= 40; k += 9) {
24549         GemmMicrokernelTester()
24550           .mr(6)
24551           .nr(16)
24552           .kr(1)
24553           .sr(1)
24554           .m(6)
24555           .n(n)
24556           .k(k)
24557           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24558       }
24559     }
24560   }
24561 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_cn)24562   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
24563     TEST_REQUIRES_ARM_NEON;
24564     for (uint32_t n = 17; n < 32; n++) {
24565       for (size_t k = 1; k <= 40; k += 9) {
24566         GemmMicrokernelTester()
24567           .mr(6)
24568           .nr(16)
24569           .kr(1)
24570           .sr(1)
24571           .m(6)
24572           .n(n)
24573           .k(k)
24574           .cn_stride(19)
24575           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24576       }
24577     }
24578   }
24579 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,n_gt_16_subtile)24580   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
24581     TEST_REQUIRES_ARM_NEON;
24582     for (uint32_t n = 17; n < 32; n++) {
24583       for (size_t k = 1; k <= 40; k += 9) {
24584         for (uint32_t m = 1; m <= 6; m++) {
24585           GemmMicrokernelTester()
24586             .mr(6)
24587             .nr(16)
24588             .kr(1)
24589             .sr(1)
24590             .m(m)
24591             .n(n)
24592             .k(k)
24593             .iterations(1)
24594             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24595         }
24596       }
24597     }
24598   }
24599 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,n_div_16)24600   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, n_div_16) {
24601     TEST_REQUIRES_ARM_NEON;
24602     for (uint32_t n = 32; n <= 48; n += 16) {
24603       for (size_t k = 1; k <= 40; k += 9) {
24604         GemmMicrokernelTester()
24605           .mr(6)
24606           .nr(16)
24607           .kr(1)
24608           .sr(1)
24609           .m(6)
24610           .n(n)
24611           .k(k)
24612           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24613       }
24614     }
24615   }
24616 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_cn)24617   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
24618     TEST_REQUIRES_ARM_NEON;
24619     for (uint32_t n = 32; n <= 48; n += 16) {
24620       for (size_t k = 1; k <= 40; k += 9) {
24621         GemmMicrokernelTester()
24622           .mr(6)
24623           .nr(16)
24624           .kr(1)
24625           .sr(1)
24626           .m(6)
24627           .n(n)
24628           .k(k)
24629           .cn_stride(19)
24630           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24631       }
24632     }
24633   }
24634 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,n_div_16_subtile)24635   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
24636     TEST_REQUIRES_ARM_NEON;
24637     for (uint32_t n = 32; n <= 48; n += 16) {
24638       for (size_t k = 1; k <= 40; k += 9) {
24639         for (uint32_t m = 1; m <= 6; m++) {
24640           GemmMicrokernelTester()
24641             .mr(6)
24642             .nr(16)
24643             .kr(1)
24644             .sr(1)
24645             .m(m)
24646             .n(n)
24647             .k(k)
24648             .iterations(1)
24649             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24650         }
24651       }
24652     }
24653   }
24654 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,small_kernel)24655   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, small_kernel) {
24656     TEST_REQUIRES_ARM_NEON;
24657     for (size_t k = 1; k <= 40; k += 9) {
24658       GemmMicrokernelTester()
24659         .mr(6)
24660         .nr(16)
24661         .kr(1)
24662         .sr(1)
24663         .m(6)
24664         .n(16)
24665         .k(k)
24666         .ks(3)
24667         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24668     }
24669   }
24670 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,small_kernel_subtile)24671   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
24672     TEST_REQUIRES_ARM_NEON;
24673     for (size_t k = 1; k <= 40; k += 9) {
24674       for (uint32_t n = 1; n <= 16; n++) {
24675         for (uint32_t m = 1; m <= 6; m++) {
24676           GemmMicrokernelTester()
24677             .mr(6)
24678             .nr(16)
24679             .kr(1)
24680             .sr(1)
24681             .m(m)
24682             .n(n)
24683             .k(k)
24684             .ks(3)
24685             .iterations(1)
24686             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24687         }
24688       }
24689     }
24690   }
24691 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,n_gt_16_small_kernel)24692   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
24693     TEST_REQUIRES_ARM_NEON;
24694     for (uint32_t n = 17; n < 32; n++) {
24695       for (size_t k = 1; k <= 40; k += 9) {
24696         GemmMicrokernelTester()
24697           .mr(6)
24698           .nr(16)
24699           .kr(1)
24700           .sr(1)
24701           .m(6)
24702           .n(n)
24703           .k(k)
24704           .ks(3)
24705           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24706       }
24707     }
24708   }
24709 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,n_div_16_small_kernel)24710   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, n_div_16_small_kernel) {
24711     TEST_REQUIRES_ARM_NEON;
24712     for (uint32_t n = 32; n <= 48; n += 16) {
24713       for (size_t k = 1; k <= 40; k += 9) {
24714         GemmMicrokernelTester()
24715           .mr(6)
24716           .nr(16)
24717           .kr(1)
24718           .sr(1)
24719           .m(6)
24720           .n(n)
24721           .k(k)
24722           .ks(3)
24723           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24724       }
24725     }
24726   }
24727 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,strided_cm_subtile)24728   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
24729     TEST_REQUIRES_ARM_NEON;
24730     for (size_t k = 1; k <= 40; k += 9) {
24731       for (uint32_t n = 1; n <= 16; n++) {
24732         for (uint32_t m = 1; m <= 6; m++) {
24733           GemmMicrokernelTester()
24734             .mr(6)
24735             .nr(16)
24736             .kr(1)
24737             .sr(1)
24738             .m(m)
24739             .n(n)
24740             .k(k)
24741             .cm_stride(19)
24742             .iterations(1)
24743             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24744         }
24745       }
24746     }
24747   }
24748 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,a_offset)24749   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, a_offset) {
24750     TEST_REQUIRES_ARM_NEON;
24751     for (size_t k = 1; k <= 40; k += 9) {
24752       GemmMicrokernelTester()
24753         .mr(6)
24754         .nr(16)
24755         .kr(1)
24756         .sr(1)
24757         .m(6)
24758         .n(16)
24759         .k(k)
24760         .ks(3)
24761         .a_offset(251)
24762         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24763     }
24764   }
24765 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,zero)24766   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, zero) {
24767     TEST_REQUIRES_ARM_NEON;
24768     for (size_t k = 1; k <= 40; k += 9) {
24769       for (uint32_t mz = 0; mz < 6; mz++) {
24770         GemmMicrokernelTester()
24771           .mr(6)
24772           .nr(16)
24773           .kr(1)
24774           .sr(1)
24775           .m(6)
24776           .n(16)
24777           .k(k)
24778           .ks(3)
24779           .a_offset(251)
24780           .zero_index(mz)
24781           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24782       }
24783     }
24784   }
24785 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,qmin)24786   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, qmin) {
24787     TEST_REQUIRES_ARM_NEON;
24788     GemmMicrokernelTester()
24789       .mr(6)
24790       .nr(16)
24791       .kr(1)
24792       .sr(1)
24793       .m(6)
24794       .n(16)
24795       .k(8)
24796       .qmin(128)
24797       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24798   }
24799 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,qmax)24800   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, qmax) {
24801     TEST_REQUIRES_ARM_NEON;
24802     GemmMicrokernelTester()
24803       .mr(6)
24804       .nr(16)
24805       .kr(1)
24806       .sr(1)
24807       .m(6)
24808       .n(16)
24809       .k(8)
24810       .qmax(128)
24811       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24812   }
24813 
TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM,strided_cm)24814   TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, strided_cm) {
24815     TEST_REQUIRES_ARM_NEON;
24816     GemmMicrokernelTester()
24817       .mr(6)
24818       .nr(16)
24819       .kr(1)
24820       .sr(1)
24821       .m(6)
24822       .n(16)
24823       .k(8)
24824       .cm_stride(19)
24825       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24826   }
24827 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
24828 
24829 
24830 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,k_eq_8)24831   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_eq_8) {
24832     TEST_REQUIRES_ARM_NEON;
24833     GemmMicrokernelTester()
24834       .mr(2)
24835       .nr(8)
24836       .kr(1)
24837       .sr(1)
24838       .m(2)
24839       .n(8)
24840       .k(8)
24841       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24842   }
24843 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,strided_cn)24844   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, strided_cn) {
24845     TEST_REQUIRES_ARM_NEON;
24846     GemmMicrokernelTester()
24847       .mr(2)
24848       .nr(8)
24849       .kr(1)
24850       .sr(1)
24851       .m(2)
24852       .n(8)
24853       .k(8)
24854       .cn_stride(11)
24855       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24856   }
24857 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,k_eq_8_subtile)24858   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
24859     TEST_REQUIRES_ARM_NEON;
24860     for (uint32_t n = 1; n <= 8; n++) {
24861       for (uint32_t m = 1; m <= 2; m++) {
24862         GemmMicrokernelTester()
24863           .mr(2)
24864           .nr(8)
24865           .kr(1)
24866           .sr(1)
24867           .m(m)
24868           .n(n)
24869           .k(8)
24870           .iterations(1)
24871           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24872       }
24873     }
24874   }
24875 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,k_eq_8_subtile_m)24876   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
24877     TEST_REQUIRES_ARM_NEON;
24878     for (uint32_t m = 1; m <= 2; m++) {
24879       GemmMicrokernelTester()
24880         .mr(2)
24881         .nr(8)
24882         .kr(1)
24883         .sr(1)
24884         .m(m)
24885         .n(8)
24886         .k(8)
24887         .iterations(1)
24888         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24889     }
24890   }
24891 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,k_eq_8_subtile_n)24892   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
24893     TEST_REQUIRES_ARM_NEON;
24894     for (uint32_t n = 1; n <= 8; n++) {
24895       GemmMicrokernelTester()
24896         .mr(2)
24897         .nr(8)
24898         .kr(1)
24899         .sr(1)
24900         .m(2)
24901         .n(n)
24902         .k(8)
24903         .iterations(1)
24904         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24905     }
24906   }
24907 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,k_lt_8)24908   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_lt_8) {
24909     TEST_REQUIRES_ARM_NEON;
24910     for (size_t k = 1; k < 8; k++) {
24911       GemmMicrokernelTester()
24912         .mr(2)
24913         .nr(8)
24914         .kr(1)
24915         .sr(1)
24916         .m(2)
24917         .n(8)
24918         .k(k)
24919         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24920     }
24921   }
24922 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,k_lt_8_subtile)24923   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
24924     TEST_REQUIRES_ARM_NEON;
24925     for (size_t k = 1; k < 8; k++) {
24926       for (uint32_t n = 1; n <= 8; n++) {
24927         for (uint32_t m = 1; m <= 2; m++) {
24928           GemmMicrokernelTester()
24929             .mr(2)
24930             .nr(8)
24931             .kr(1)
24932             .sr(1)
24933             .m(m)
24934             .n(n)
24935             .k(k)
24936             .iterations(1)
24937             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24938         }
24939       }
24940     }
24941   }
24942 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,k_gt_8)24943   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_gt_8) {
24944     TEST_REQUIRES_ARM_NEON;
24945     for (size_t k = 9; k < 16; k++) {
24946       GemmMicrokernelTester()
24947         .mr(2)
24948         .nr(8)
24949         .kr(1)
24950         .sr(1)
24951         .m(2)
24952         .n(8)
24953         .k(k)
24954         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24955     }
24956   }
24957 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,k_gt_8_subtile)24958   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
24959     TEST_REQUIRES_ARM_NEON;
24960     for (size_t k = 9; k < 16; k++) {
24961       for (uint32_t n = 1; n <= 8; n++) {
24962         for (uint32_t m = 1; m <= 2; m++) {
24963           GemmMicrokernelTester()
24964             .mr(2)
24965             .nr(8)
24966             .kr(1)
24967             .sr(1)
24968             .m(m)
24969             .n(n)
24970             .k(k)
24971             .iterations(1)
24972             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24973         }
24974       }
24975     }
24976   }
24977 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,k_div_8)24978   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_div_8) {
24979     TEST_REQUIRES_ARM_NEON;
24980     for (size_t k = 16; k <= 80; k += 8) {
24981       GemmMicrokernelTester()
24982         .mr(2)
24983         .nr(8)
24984         .kr(1)
24985         .sr(1)
24986         .m(2)
24987         .n(8)
24988         .k(k)
24989         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
24990     }
24991   }
24992 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,k_div_8_subtile)24993   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
24994     TEST_REQUIRES_ARM_NEON;
24995     for (size_t k = 16; k <= 80; k += 8) {
24996       for (uint32_t n = 1; n <= 8; n++) {
24997         for (uint32_t m = 1; m <= 2; m++) {
24998           GemmMicrokernelTester()
24999             .mr(2)
25000             .nr(8)
25001             .kr(1)
25002             .sr(1)
25003             .m(m)
25004             .n(n)
25005             .k(k)
25006             .iterations(1)
25007             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25008         }
25009       }
25010     }
25011   }
25012 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,n_gt_8)25013   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_gt_8) {
25014     TEST_REQUIRES_ARM_NEON;
25015     for (uint32_t n = 9; n < 16; n++) {
25016       for (size_t k = 1; k <= 40; k += 9) {
25017         GemmMicrokernelTester()
25018           .mr(2)
25019           .nr(8)
25020           .kr(1)
25021           .sr(1)
25022           .m(2)
25023           .n(n)
25024           .k(k)
25025           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25026       }
25027     }
25028   }
25029 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,n_gt_8_strided_cn)25030   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
25031     TEST_REQUIRES_ARM_NEON;
25032     for (uint32_t n = 9; n < 16; n++) {
25033       for (size_t k = 1; k <= 40; k += 9) {
25034         GemmMicrokernelTester()
25035           .mr(2)
25036           .nr(8)
25037           .kr(1)
25038           .sr(1)
25039           .m(2)
25040           .n(n)
25041           .k(k)
25042           .cn_stride(11)
25043           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25044       }
25045     }
25046   }
25047 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,n_gt_8_subtile)25048   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
25049     TEST_REQUIRES_ARM_NEON;
25050     for (uint32_t n = 9; n < 16; n++) {
25051       for (size_t k = 1; k <= 40; k += 9) {
25052         for (uint32_t m = 1; m <= 2; m++) {
25053           GemmMicrokernelTester()
25054             .mr(2)
25055             .nr(8)
25056             .kr(1)
25057             .sr(1)
25058             .m(m)
25059             .n(n)
25060             .k(k)
25061             .iterations(1)
25062             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25063         }
25064       }
25065     }
25066   }
25067 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,n_div_8)25068   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_div_8) {
25069     TEST_REQUIRES_ARM_NEON;
25070     for (uint32_t n = 16; n <= 24; n += 8) {
25071       for (size_t k = 1; k <= 40; k += 9) {
25072         GemmMicrokernelTester()
25073           .mr(2)
25074           .nr(8)
25075           .kr(1)
25076           .sr(1)
25077           .m(2)
25078           .n(n)
25079           .k(k)
25080           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25081       }
25082     }
25083   }
25084 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,n_div_8_strided_cn)25085   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
25086     TEST_REQUIRES_ARM_NEON;
25087     for (uint32_t n = 16; n <= 24; n += 8) {
25088       for (size_t k = 1; k <= 40; k += 9) {
25089         GemmMicrokernelTester()
25090           .mr(2)
25091           .nr(8)
25092           .kr(1)
25093           .sr(1)
25094           .m(2)
25095           .n(n)
25096           .k(k)
25097           .cn_stride(11)
25098           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25099       }
25100     }
25101   }
25102 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,n_div_8_subtile)25103   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
25104     TEST_REQUIRES_ARM_NEON;
25105     for (uint32_t n = 16; n <= 24; n += 8) {
25106       for (size_t k = 1; k <= 40; k += 9) {
25107         for (uint32_t m = 1; m <= 2; m++) {
25108           GemmMicrokernelTester()
25109             .mr(2)
25110             .nr(8)
25111             .kr(1)
25112             .sr(1)
25113             .m(m)
25114             .n(n)
25115             .k(k)
25116             .iterations(1)
25117             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25118         }
25119       }
25120     }
25121   }
25122 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,small_kernel)25123   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, small_kernel) {
25124     TEST_REQUIRES_ARM_NEON;
25125     for (size_t k = 1; k <= 40; k += 9) {
25126       GemmMicrokernelTester()
25127         .mr(2)
25128         .nr(8)
25129         .kr(1)
25130         .sr(1)
25131         .m(2)
25132         .n(8)
25133         .k(k)
25134         .ks(3)
25135         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25136     }
25137   }
25138 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,small_kernel_subtile)25139   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
25140     TEST_REQUIRES_ARM_NEON;
25141     for (size_t k = 1; k <= 40; k += 9) {
25142       for (uint32_t n = 1; n <= 8; n++) {
25143         for (uint32_t m = 1; m <= 2; m++) {
25144           GemmMicrokernelTester()
25145             .mr(2)
25146             .nr(8)
25147             .kr(1)
25148             .sr(1)
25149             .m(m)
25150             .n(n)
25151             .k(k)
25152             .ks(3)
25153             .iterations(1)
25154             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25155         }
25156       }
25157     }
25158   }
25159 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,n_gt_8_small_kernel)25160   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_gt_8_small_kernel) {
25161     TEST_REQUIRES_ARM_NEON;
25162     for (uint32_t n = 9; n < 16; n++) {
25163       for (size_t k = 1; k <= 40; k += 9) {
25164         GemmMicrokernelTester()
25165           .mr(2)
25166           .nr(8)
25167           .kr(1)
25168           .sr(1)
25169           .m(2)
25170           .n(n)
25171           .k(k)
25172           .ks(3)
25173           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25174       }
25175     }
25176   }
25177 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,n_div_8_small_kernel)25178   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_div_8_small_kernel) {
25179     TEST_REQUIRES_ARM_NEON;
25180     for (uint32_t n = 16; n <= 24; n += 8) {
25181       for (size_t k = 1; k <= 40; k += 9) {
25182         GemmMicrokernelTester()
25183           .mr(2)
25184           .nr(8)
25185           .kr(1)
25186           .sr(1)
25187           .m(2)
25188           .n(n)
25189           .k(k)
25190           .ks(3)
25191           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25192       }
25193     }
25194   }
25195 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,strided_cm_subtile)25196   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
25197     TEST_REQUIRES_ARM_NEON;
25198     for (size_t k = 1; k <= 40; k += 9) {
25199       for (uint32_t n = 1; n <= 8; n++) {
25200         for (uint32_t m = 1; m <= 2; m++) {
25201           GemmMicrokernelTester()
25202             .mr(2)
25203             .nr(8)
25204             .kr(1)
25205             .sr(1)
25206             .m(m)
25207             .n(n)
25208             .k(k)
25209             .cm_stride(11)
25210             .iterations(1)
25211             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25212         }
25213       }
25214     }
25215   }
25216 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,a_offset)25217   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, a_offset) {
25218     TEST_REQUIRES_ARM_NEON;
25219     for (size_t k = 1; k <= 40; k += 9) {
25220       GemmMicrokernelTester()
25221         .mr(2)
25222         .nr(8)
25223         .kr(1)
25224         .sr(1)
25225         .m(2)
25226         .n(8)
25227         .k(k)
25228         .ks(3)
25229         .a_offset(83)
25230         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25231     }
25232   }
25233 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,zero)25234   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, zero) {
25235     TEST_REQUIRES_ARM_NEON;
25236     for (size_t k = 1; k <= 40; k += 9) {
25237       for (uint32_t mz = 0; mz < 2; mz++) {
25238         GemmMicrokernelTester()
25239           .mr(2)
25240           .nr(8)
25241           .kr(1)
25242           .sr(1)
25243           .m(2)
25244           .n(8)
25245           .k(k)
25246           .ks(3)
25247           .a_offset(83)
25248           .zero_index(mz)
25249           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25250       }
25251     }
25252   }
25253 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,qmin)25254   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, qmin) {
25255     TEST_REQUIRES_ARM_NEON;
25256     GemmMicrokernelTester()
25257       .mr(2)
25258       .nr(8)
25259       .kr(1)
25260       .sr(1)
25261       .m(2)
25262       .n(8)
25263       .k(8)
25264       .qmin(128)
25265       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25266   }
25267 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,qmax)25268   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, qmax) {
25269     TEST_REQUIRES_ARM_NEON;
25270     GemmMicrokernelTester()
25271       .mr(2)
25272       .nr(8)
25273       .kr(1)
25274       .sr(1)
25275       .m(2)
25276       .n(8)
25277       .k(8)
25278       .qmax(128)
25279       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25280   }
25281 
TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP,strided_cm)25282   TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, strided_cm) {
25283     TEST_REQUIRES_ARM_NEON;
25284     GemmMicrokernelTester()
25285       .mr(2)
25286       .nr(8)
25287       .kr(1)
25288       .sr(1)
25289       .m(2)
25290       .n(8)
25291       .k(8)
25292       .cm_stride(11)
25293       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25294   }
25295 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
25296 
25297 
25298 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_eq_8)25299   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8) {
25300     TEST_REQUIRES_ARM_NEON;
25301     GemmMicrokernelTester()
25302       .mr(3)
25303       .nr(8)
25304       .kr(1)
25305       .sr(1)
25306       .m(3)
25307       .n(8)
25308       .k(8)
25309       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25310   }
25311 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,strided_cn)25312   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, strided_cn) {
25313     TEST_REQUIRES_ARM_NEON;
25314     GemmMicrokernelTester()
25315       .mr(3)
25316       .nr(8)
25317       .kr(1)
25318       .sr(1)
25319       .m(3)
25320       .n(8)
25321       .k(8)
25322       .cn_stride(11)
25323       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25324   }
25325 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_eq_8_subtile)25326   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
25327     TEST_REQUIRES_ARM_NEON;
25328     for (uint32_t n = 1; n <= 8; n++) {
25329       for (uint32_t m = 1; m <= 3; m++) {
25330         GemmMicrokernelTester()
25331           .mr(3)
25332           .nr(8)
25333           .kr(1)
25334           .sr(1)
25335           .m(m)
25336           .n(n)
25337           .k(8)
25338           .iterations(1)
25339           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25340       }
25341     }
25342   }
25343 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_eq_8_subtile_m)25344   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
25345     TEST_REQUIRES_ARM_NEON;
25346     for (uint32_t m = 1; m <= 3; m++) {
25347       GemmMicrokernelTester()
25348         .mr(3)
25349         .nr(8)
25350         .kr(1)
25351         .sr(1)
25352         .m(m)
25353         .n(8)
25354         .k(8)
25355         .iterations(1)
25356         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25357     }
25358   }
25359 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_eq_8_subtile_n)25360   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
25361     TEST_REQUIRES_ARM_NEON;
25362     for (uint32_t n = 1; n <= 8; n++) {
25363       GemmMicrokernelTester()
25364         .mr(3)
25365         .nr(8)
25366         .kr(1)
25367         .sr(1)
25368         .m(3)
25369         .n(n)
25370         .k(8)
25371         .iterations(1)
25372         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25373     }
25374   }
25375 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_lt_8)25376   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_lt_8) {
25377     TEST_REQUIRES_ARM_NEON;
25378     for (size_t k = 1; k < 8; k++) {
25379       GemmMicrokernelTester()
25380         .mr(3)
25381         .nr(8)
25382         .kr(1)
25383         .sr(1)
25384         .m(3)
25385         .n(8)
25386         .k(k)
25387         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25388     }
25389   }
25390 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_lt_8_subtile)25391   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
25392     TEST_REQUIRES_ARM_NEON;
25393     for (size_t k = 1; k < 8; k++) {
25394       for (uint32_t n = 1; n <= 8; n++) {
25395         for (uint32_t m = 1; m <= 3; m++) {
25396           GemmMicrokernelTester()
25397             .mr(3)
25398             .nr(8)
25399             .kr(1)
25400             .sr(1)
25401             .m(m)
25402             .n(n)
25403             .k(k)
25404             .iterations(1)
25405             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25406         }
25407       }
25408     }
25409   }
25410 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_gt_8)25411   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_gt_8) {
25412     TEST_REQUIRES_ARM_NEON;
25413     for (size_t k = 9; k < 16; k++) {
25414       GemmMicrokernelTester()
25415         .mr(3)
25416         .nr(8)
25417         .kr(1)
25418         .sr(1)
25419         .m(3)
25420         .n(8)
25421         .k(k)
25422         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25423     }
25424   }
25425 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_gt_8_subtile)25426   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
25427     TEST_REQUIRES_ARM_NEON;
25428     for (size_t k = 9; k < 16; k++) {
25429       for (uint32_t n = 1; n <= 8; n++) {
25430         for (uint32_t m = 1; m <= 3; m++) {
25431           GemmMicrokernelTester()
25432             .mr(3)
25433             .nr(8)
25434             .kr(1)
25435             .sr(1)
25436             .m(m)
25437             .n(n)
25438             .k(k)
25439             .iterations(1)
25440             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25441         }
25442       }
25443     }
25444   }
25445 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_div_8)25446   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_div_8) {
25447     TEST_REQUIRES_ARM_NEON;
25448     for (size_t k = 16; k <= 80; k += 8) {
25449       GemmMicrokernelTester()
25450         .mr(3)
25451         .nr(8)
25452         .kr(1)
25453         .sr(1)
25454         .m(3)
25455         .n(8)
25456         .k(k)
25457         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25458     }
25459   }
25460 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_div_8_subtile)25461   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
25462     TEST_REQUIRES_ARM_NEON;
25463     for (size_t k = 16; k <= 80; k += 8) {
25464       for (uint32_t n = 1; n <= 8; n++) {
25465         for (uint32_t m = 1; m <= 3; m++) {
25466           GemmMicrokernelTester()
25467             .mr(3)
25468             .nr(8)
25469             .kr(1)
25470             .sr(1)
25471             .m(m)
25472             .n(n)
25473             .k(k)
25474             .iterations(1)
25475             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25476         }
25477       }
25478     }
25479   }
25480 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,n_gt_8)25481   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_gt_8) {
25482     TEST_REQUIRES_ARM_NEON;
25483     for (uint32_t n = 9; n < 16; n++) {
25484       for (size_t k = 1; k <= 40; k += 9) {
25485         GemmMicrokernelTester()
25486           .mr(3)
25487           .nr(8)
25488           .kr(1)
25489           .sr(1)
25490           .m(3)
25491           .n(n)
25492           .k(k)
25493           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25494       }
25495     }
25496   }
25497 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,n_gt_8_strided_cn)25498   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
25499     TEST_REQUIRES_ARM_NEON;
25500     for (uint32_t n = 9; n < 16; n++) {
25501       for (size_t k = 1; k <= 40; k += 9) {
25502         GemmMicrokernelTester()
25503           .mr(3)
25504           .nr(8)
25505           .kr(1)
25506           .sr(1)
25507           .m(3)
25508           .n(n)
25509           .k(k)
25510           .cn_stride(11)
25511           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25512       }
25513     }
25514   }
25515 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,n_gt_8_subtile)25516   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
25517     TEST_REQUIRES_ARM_NEON;
25518     for (uint32_t n = 9; n < 16; n++) {
25519       for (size_t k = 1; k <= 40; k += 9) {
25520         for (uint32_t m = 1; m <= 3; m++) {
25521           GemmMicrokernelTester()
25522             .mr(3)
25523             .nr(8)
25524             .kr(1)
25525             .sr(1)
25526             .m(m)
25527             .n(n)
25528             .k(k)
25529             .iterations(1)
25530             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25531         }
25532       }
25533     }
25534   }
25535 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,n_div_8)25536   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_div_8) {
25537     TEST_REQUIRES_ARM_NEON;
25538     for (uint32_t n = 16; n <= 24; n += 8) {
25539       for (size_t k = 1; k <= 40; k += 9) {
25540         GemmMicrokernelTester()
25541           .mr(3)
25542           .nr(8)
25543           .kr(1)
25544           .sr(1)
25545           .m(3)
25546           .n(n)
25547           .k(k)
25548           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25549       }
25550     }
25551   }
25552 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,n_div_8_strided_cn)25553   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
25554     TEST_REQUIRES_ARM_NEON;
25555     for (uint32_t n = 16; n <= 24; n += 8) {
25556       for (size_t k = 1; k <= 40; k += 9) {
25557         GemmMicrokernelTester()
25558           .mr(3)
25559           .nr(8)
25560           .kr(1)
25561           .sr(1)
25562           .m(3)
25563           .n(n)
25564           .k(k)
25565           .cn_stride(11)
25566           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25567       }
25568     }
25569   }
25570 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,n_div_8_subtile)25571   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
25572     TEST_REQUIRES_ARM_NEON;
25573     for (uint32_t n = 16; n <= 24; n += 8) {
25574       for (size_t k = 1; k <= 40; k += 9) {
25575         for (uint32_t m = 1; m <= 3; m++) {
25576           GemmMicrokernelTester()
25577             .mr(3)
25578             .nr(8)
25579             .kr(1)
25580             .sr(1)
25581             .m(m)
25582             .n(n)
25583             .k(k)
25584             .iterations(1)
25585             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25586         }
25587       }
25588     }
25589   }
25590 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,small_kernel)25591   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, small_kernel) {
25592     TEST_REQUIRES_ARM_NEON;
25593     for (size_t k = 1; k <= 40; k += 9) {
25594       GemmMicrokernelTester()
25595         .mr(3)
25596         .nr(8)
25597         .kr(1)
25598         .sr(1)
25599         .m(3)
25600         .n(8)
25601         .k(k)
25602         .ks(3)
25603         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25604     }
25605   }
25606 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,small_kernel_subtile)25607   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
25608     TEST_REQUIRES_ARM_NEON;
25609     for (size_t k = 1; k <= 40; k += 9) {
25610       for (uint32_t n = 1; n <= 8; n++) {
25611         for (uint32_t m = 1; m <= 3; m++) {
25612           GemmMicrokernelTester()
25613             .mr(3)
25614             .nr(8)
25615             .kr(1)
25616             .sr(1)
25617             .m(m)
25618             .n(n)
25619             .k(k)
25620             .ks(3)
25621             .iterations(1)
25622             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25623         }
25624       }
25625     }
25626   }
25627 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,n_gt_8_small_kernel)25628   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_gt_8_small_kernel) {
25629     TEST_REQUIRES_ARM_NEON;
25630     for (uint32_t n = 9; n < 16; n++) {
25631       for (size_t k = 1; k <= 40; k += 9) {
25632         GemmMicrokernelTester()
25633           .mr(3)
25634           .nr(8)
25635           .kr(1)
25636           .sr(1)
25637           .m(3)
25638           .n(n)
25639           .k(k)
25640           .ks(3)
25641           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25642       }
25643     }
25644   }
25645 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,n_div_8_small_kernel)25646   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_div_8_small_kernel) {
25647     TEST_REQUIRES_ARM_NEON;
25648     for (uint32_t n = 16; n <= 24; n += 8) {
25649       for (size_t k = 1; k <= 40; k += 9) {
25650         GemmMicrokernelTester()
25651           .mr(3)
25652           .nr(8)
25653           .kr(1)
25654           .sr(1)
25655           .m(3)
25656           .n(n)
25657           .k(k)
25658           .ks(3)
25659           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25660       }
25661     }
25662   }
25663 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,strided_cm_subtile)25664   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
25665     TEST_REQUIRES_ARM_NEON;
25666     for (size_t k = 1; k <= 40; k += 9) {
25667       for (uint32_t n = 1; n <= 8; n++) {
25668         for (uint32_t m = 1; m <= 3; m++) {
25669           GemmMicrokernelTester()
25670             .mr(3)
25671             .nr(8)
25672             .kr(1)
25673             .sr(1)
25674             .m(m)
25675             .n(n)
25676             .k(k)
25677             .cm_stride(11)
25678             .iterations(1)
25679             .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25680         }
25681       }
25682     }
25683   }
25684 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,a_offset)25685   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, a_offset) {
25686     TEST_REQUIRES_ARM_NEON;
25687     for (size_t k = 1; k <= 40; k += 9) {
25688       GemmMicrokernelTester()
25689         .mr(3)
25690         .nr(8)
25691         .kr(1)
25692         .sr(1)
25693         .m(3)
25694         .n(8)
25695         .k(k)
25696         .ks(3)
25697         .a_offset(127)
25698         .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25699     }
25700   }
25701 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,zero)25702   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, zero) {
25703     TEST_REQUIRES_ARM_NEON;
25704     for (size_t k = 1; k <= 40; k += 9) {
25705       for (uint32_t mz = 0; mz < 3; mz++) {
25706         GemmMicrokernelTester()
25707           .mr(3)
25708           .nr(8)
25709           .kr(1)
25710           .sr(1)
25711           .m(3)
25712           .n(8)
25713           .k(k)
25714           .ks(3)
25715           .a_offset(127)
25716           .zero_index(mz)
25717           .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25718       }
25719     }
25720   }
25721 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,qmin)25722   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, qmin) {
25723     TEST_REQUIRES_ARM_NEON;
25724     GemmMicrokernelTester()
25725       .mr(3)
25726       .nr(8)
25727       .kr(1)
25728       .sr(1)
25729       .m(3)
25730       .n(8)
25731       .k(8)
25732       .qmin(128)
25733       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25734   }
25735 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,qmax)25736   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, qmax) {
25737     TEST_REQUIRES_ARM_NEON;
25738     GemmMicrokernelTester()
25739       .mr(3)
25740       .nr(8)
25741       .kr(1)
25742       .sr(1)
25743       .m(3)
25744       .n(8)
25745       .k(8)
25746       .qmax(128)
25747       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25748   }
25749 
TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,strided_cm)25750   TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, strided_cm) {
25751     TEST_REQUIRES_ARM_NEON;
25752     GemmMicrokernelTester()
25753       .mr(3)
25754       .nr(8)
25755       .kr(1)
25756       .sr(1)
25757       .m(3)
25758       .n(8)
25759       .k(8)
25760       .cm_stride(11)
25761       .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
25762   }
25763 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
25764