1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 // 9 // Auto-generated file. Do not edit! 10 // Specification: test/qu8-igemm-minmax-rndnu.yaml 11 // Generator: tools/generate-gemm-test.py 12 13 14 #include <gtest/gtest.h> 15 16 #include <xnnpack/allocator.h> 17 #include <xnnpack/common.h> 18 #include <xnnpack/isa-checks.h> 19 #include <xnnpack/microparams-init.h> 20 21 #include <xnnpack/gemm.h> 22 #include <xnnpack/igemm.h> 23 #include <xnnpack/ppmm.h> 24 #include "gemm-microkernel-tester.h" 25 26 27 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8)28 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8) { 29 TEST_REQUIRES_ARM_NEON; 30 GemmMicrokernelTester() 31 .mr(1) 32 .nr(8) 33 .kr(1) 34 .sr(1) 35 .m(1) 36 .n(8) 37 .k(8) 38 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 39 } 40 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,strided_cn)41 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, strided_cn) { 42 TEST_REQUIRES_ARM_NEON; 43 GemmMicrokernelTester() 44 .mr(1) 45 .nr(8) 46 .kr(1) 47 .sr(1) 48 .m(1) 49 .n(8) 50 .k(8) 51 .cn_stride(11) 52 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 53 } 54 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8_subtile)55 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8_subtile) { 56 TEST_REQUIRES_ARM_NEON; 57 for (uint32_t n = 1; n <= 8; n++) { 58 for (uint32_t m = 1; m <= 1; m++) { 59 GemmMicrokernelTester() 60 .mr(1) 61 .nr(8) 62 .kr(1) 63 .sr(1) 64 .m(m) 65 .n(n) 66 .k(8) 67 .iterations(1) 68 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 69 } 70 } 71 } 72 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8_subtile_m)73 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8_subtile_m) { 74 TEST_REQUIRES_ARM_NEON; 75 for (uint32_t m = 1; m <= 1; m++) { 76 GemmMicrokernelTester() 77 .mr(1) 78 .nr(8) 79 .kr(1) 80 .sr(1) 81 .m(m) 82 .n(8) 83 .k(8) 84 .iterations(1) 85 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 86 } 87 } 88 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8_subtile_n)89 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8_subtile_n) { 90 TEST_REQUIRES_ARM_NEON; 91 for (uint32_t n = 1; n <= 8; n++) { 92 GemmMicrokernelTester() 93 .mr(1) 94 .nr(8) 95 .kr(1) 96 .sr(1) 97 .m(1) 98 .n(n) 99 .k(8) 100 .iterations(1) 101 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 102 } 103 } 104 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_lt_8)105 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_lt_8) { 106 TEST_REQUIRES_ARM_NEON; 107 for (size_t k = 1; k < 8; k++) { 108 GemmMicrokernelTester() 109 .mr(1) 110 .nr(8) 111 .kr(1) 112 .sr(1) 113 .m(1) 114 .n(8) 115 .k(k) 116 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 117 } 118 } 119 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_lt_8_subtile)120 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_lt_8_subtile) { 121 TEST_REQUIRES_ARM_NEON; 122 for (size_t k = 1; k < 8; k++) { 123 for (uint32_t n = 1; n <= 8; n++) { 124 for (uint32_t m = 1; m <= 1; m++) { 125 GemmMicrokernelTester() 126 .mr(1) 127 .nr(8) 128 .kr(1) 129 .sr(1) 130 .m(m) 131 .n(n) 132 .k(k) 133 .iterations(1) 134 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 135 } 136 } 137 } 138 } 139 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_gt_8)140 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_gt_8) { 141 TEST_REQUIRES_ARM_NEON; 142 for (size_t k = 9; k < 16; k++) { 143 GemmMicrokernelTester() 144 .mr(1) 145 .nr(8) 146 .kr(1) 147 .sr(1) 148 .m(1) 149 .n(8) 150 .k(k) 151 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 152 } 153 } 154 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_gt_8_subtile)155 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_gt_8_subtile) { 156 TEST_REQUIRES_ARM_NEON; 157 for (size_t k = 9; k < 16; k++) { 158 for (uint32_t n = 1; n <= 8; n++) { 159 for (uint32_t m = 1; m <= 1; m++) { 160 GemmMicrokernelTester() 161 .mr(1) 162 .nr(8) 163 .kr(1) 164 .sr(1) 165 .m(m) 166 .n(n) 167 .k(k) 168 .iterations(1) 169 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 170 } 171 } 172 } 173 } 174 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_div_8)175 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_div_8) { 176 TEST_REQUIRES_ARM_NEON; 177 for (size_t k = 16; k <= 80; k += 8) { 178 GemmMicrokernelTester() 179 .mr(1) 180 .nr(8) 181 .kr(1) 182 .sr(1) 183 .m(1) 184 .n(8) 185 .k(k) 186 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 187 } 188 } 189 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_div_8_subtile)190 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_div_8_subtile) { 191 TEST_REQUIRES_ARM_NEON; 192 for (size_t k = 16; k <= 80; k += 8) { 193 for (uint32_t n = 1; n <= 8; n++) { 194 for (uint32_t m = 1; m <= 1; m++) { 195 GemmMicrokernelTester() 196 .mr(1) 197 .nr(8) 198 .kr(1) 199 .sr(1) 200 .m(m) 201 .n(n) 202 .k(k) 203 .iterations(1) 204 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 205 } 206 } 207 } 208 } 209 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8)210 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8) { 211 TEST_REQUIRES_ARM_NEON; 212 for (uint32_t n = 9; n < 16; n++) { 213 for (size_t k = 1; k <= 40; k += 9) { 214 GemmMicrokernelTester() 215 .mr(1) 216 .nr(8) 217 .kr(1) 218 .sr(1) 219 .m(1) 220 .n(n) 221 .k(k) 222 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 223 } 224 } 225 } 226 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8_strided_cn)227 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8_strided_cn) { 228 TEST_REQUIRES_ARM_NEON; 229 for (uint32_t n = 9; n < 16; n++) { 230 for (size_t k = 1; k <= 40; k += 9) { 231 GemmMicrokernelTester() 232 .mr(1) 233 .nr(8) 234 .kr(1) 235 .sr(1) 236 .m(1) 237 .n(n) 238 .k(k) 239 .cn_stride(11) 240 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 241 } 242 } 243 } 244 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8_subtile)245 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8_subtile) { 246 TEST_REQUIRES_ARM_NEON; 247 for (uint32_t n = 9; n < 16; n++) { 248 for (size_t k = 1; k <= 40; k += 9) { 249 for (uint32_t m = 1; m <= 1; m++) { 250 GemmMicrokernelTester() 251 .mr(1) 252 .nr(8) 253 .kr(1) 254 .sr(1) 255 .m(m) 256 .n(n) 257 .k(k) 258 .iterations(1) 259 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 260 } 261 } 262 } 263 } 264 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8)265 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8) { 266 TEST_REQUIRES_ARM_NEON; 267 for (uint32_t n = 16; n <= 24; n += 8) { 268 for (size_t k = 1; k <= 40; k += 9) { 269 GemmMicrokernelTester() 270 .mr(1) 271 .nr(8) 272 .kr(1) 273 .sr(1) 274 .m(1) 275 .n(n) 276 .k(k) 277 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 278 } 279 } 280 } 281 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8_strided_cn)282 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8_strided_cn) { 283 TEST_REQUIRES_ARM_NEON; 284 for (uint32_t n = 16; n <= 24; n += 8) { 285 for (size_t k = 1; k <= 40; k += 9) { 286 GemmMicrokernelTester() 287 .mr(1) 288 .nr(8) 289 .kr(1) 290 .sr(1) 291 .m(1) 292 .n(n) 293 .k(k) 294 .cn_stride(11) 295 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 296 } 297 } 298 } 299 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8_subtile)300 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8_subtile) { 301 TEST_REQUIRES_ARM_NEON; 302 for (uint32_t n = 16; n <= 24; n += 8) { 303 for (size_t k = 1; k <= 40; k += 9) { 304 for (uint32_t m = 1; m <= 1; m++) { 305 GemmMicrokernelTester() 306 .mr(1) 307 .nr(8) 308 .kr(1) 309 .sr(1) 310 .m(m) 311 .n(n) 312 .k(k) 313 .iterations(1) 314 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 315 } 316 } 317 } 318 } 319 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,small_kernel)320 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, small_kernel) { 321 TEST_REQUIRES_ARM_NEON; 322 for (size_t k = 1; k <= 40; k += 9) { 323 GemmMicrokernelTester() 324 .mr(1) 325 .nr(8) 326 .kr(1) 327 .sr(1) 328 .m(1) 329 .n(8) 330 .k(k) 331 .ks(3) 332 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 333 } 334 } 335 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,small_kernel_subtile)336 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, small_kernel_subtile) { 337 TEST_REQUIRES_ARM_NEON; 338 for (size_t k = 1; k <= 40; k += 9) { 339 for (uint32_t n = 1; n <= 8; n++) { 340 for (uint32_t m = 1; m <= 1; m++) { 341 GemmMicrokernelTester() 342 .mr(1) 343 .nr(8) 344 .kr(1) 345 .sr(1) 346 .m(m) 347 .n(n) 348 .k(k) 349 .ks(3) 350 .iterations(1) 351 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 352 } 353 } 354 } 355 } 356 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8_small_kernel)357 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8_small_kernel) { 358 TEST_REQUIRES_ARM_NEON; 359 for (uint32_t n = 9; n < 16; n++) { 360 for (size_t k = 1; k <= 40; k += 9) { 361 GemmMicrokernelTester() 362 .mr(1) 363 .nr(8) 364 .kr(1) 365 .sr(1) 366 .m(1) 367 .n(n) 368 .k(k) 369 .ks(3) 370 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 371 } 372 } 373 } 374 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8_small_kernel)375 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8_small_kernel) { 376 TEST_REQUIRES_ARM_NEON; 377 for (uint32_t n = 16; n <= 24; n += 8) { 378 for (size_t k = 1; k <= 40; k += 9) { 379 GemmMicrokernelTester() 380 .mr(1) 381 .nr(8) 382 .kr(1) 383 .sr(1) 384 .m(1) 385 .n(n) 386 .k(k) 387 .ks(3) 388 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 389 } 390 } 391 } 392 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,strided_cm_subtile)393 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, strided_cm_subtile) { 394 TEST_REQUIRES_ARM_NEON; 395 for (size_t k = 1; k <= 40; k += 9) { 396 for (uint32_t n = 1; n <= 8; n++) { 397 for (uint32_t m = 1; m <= 1; m++) { 398 GemmMicrokernelTester() 399 .mr(1) 400 .nr(8) 401 .kr(1) 402 .sr(1) 403 .m(m) 404 .n(n) 405 .k(k) 406 .cm_stride(11) 407 .iterations(1) 408 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 409 } 410 } 411 } 412 } 413 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,a_offset)414 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, a_offset) { 415 TEST_REQUIRES_ARM_NEON; 416 for (size_t k = 1; k <= 40; k += 9) { 417 GemmMicrokernelTester() 418 .mr(1) 419 .nr(8) 420 .kr(1) 421 .sr(1) 422 .m(1) 423 .n(8) 424 .k(k) 425 .ks(3) 426 .a_offset(43) 427 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 428 } 429 } 430 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,zero)431 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, zero) { 432 TEST_REQUIRES_ARM_NEON; 433 for (size_t k = 1; k <= 40; k += 9) { 434 for (uint32_t mz = 0; mz < 1; mz++) { 435 GemmMicrokernelTester() 436 .mr(1) 437 .nr(8) 438 .kr(1) 439 .sr(1) 440 .m(1) 441 .n(8) 442 .k(k) 443 .ks(3) 444 .a_offset(43) 445 .zero_index(mz) 446 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 447 } 448 } 449 } 450 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,qmin)451 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, qmin) { 452 TEST_REQUIRES_ARM_NEON; 453 GemmMicrokernelTester() 454 .mr(1) 455 .nr(8) 456 .kr(1) 457 .sr(1) 458 .m(1) 459 .n(8) 460 .k(8) 461 .qmin(128) 462 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 463 } 464 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,qmax)465 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, qmax) { 466 TEST_REQUIRES_ARM_NEON; 467 GemmMicrokernelTester() 468 .mr(1) 469 .nr(8) 470 .kr(1) 471 .sr(1) 472 .m(1) 473 .n(8) 474 .k(8) 475 .qmax(128) 476 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 477 } 478 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,strided_cm)479 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, strided_cm) { 480 TEST_REQUIRES_ARM_NEON; 481 GemmMicrokernelTester() 482 .mr(1) 483 .nr(8) 484 .kr(1) 485 .sr(1) 486 .m(1) 487 .n(8) 488 .k(8) 489 .cm_stride(11) 490 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 491 } 492 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,no_a_zero_point)493 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, no_a_zero_point) { 494 TEST_REQUIRES_ARM_NEON; 495 for (size_t k = 1; k <= 40; k += 9) { 496 GemmMicrokernelTester() 497 .mr(1) 498 .nr(8) 499 .kr(1) 500 .sr(1) 501 .m(1) 502 .n(8) 503 .k(k) 504 .a_zero_point(0) 505 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 506 } 507 } 508 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,no_b_zero_point)509 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, no_b_zero_point) { 510 TEST_REQUIRES_ARM_NEON; 511 for (size_t k = 1; k <= 40; k += 9) { 512 GemmMicrokernelTester() 513 .mr(1) 514 .nr(8) 515 .kr(1) 516 .sr(1) 517 .m(1) 518 .n(8) 519 .k(k) 520 .b_zero_point(0) 521 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 522 } 523 } 524 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,no_zero_point)525 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, no_zero_point) { 526 TEST_REQUIRES_ARM_NEON; 527 for (size_t k = 1; k <= 40; k += 9) { 528 GemmMicrokernelTester() 529 .mr(1) 530 .nr(8) 531 .kr(1) 532 .sr(1) 533 .m(1) 534 .n(8) 535 .k(k) 536 .a_zero_point(0) 537 .b_zero_point(0) 538 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 539 } 540 } 541 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY 542 543 544 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_eq_8)545 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_eq_8) { 546 TEST_REQUIRES_ARM_NEON; 547 GemmMicrokernelTester() 548 .mr(4) 549 .nr(8) 550 .kr(1) 551 .sr(1) 552 .m(4) 553 .n(8) 554 .k(8) 555 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 556 } 557 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,strided_cn)558 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, strided_cn) { 559 TEST_REQUIRES_ARM_NEON; 560 GemmMicrokernelTester() 561 .mr(4) 562 .nr(8) 563 .kr(1) 564 .sr(1) 565 .m(4) 566 .n(8) 567 .k(8) 568 .cn_stride(11) 569 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 570 } 571 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile)572 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile) { 573 TEST_REQUIRES_ARM_NEON; 574 for (uint32_t n = 1; n <= 8; n++) { 575 for (uint32_t m = 1; m <= 4; m++) { 576 GemmMicrokernelTester() 577 .mr(4) 578 .nr(8) 579 .kr(1) 580 .sr(1) 581 .m(m) 582 .n(n) 583 .k(8) 584 .iterations(1) 585 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 586 } 587 } 588 } 589 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile_m)590 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_m) { 591 TEST_REQUIRES_ARM_NEON; 592 for (uint32_t m = 1; m <= 4; m++) { 593 GemmMicrokernelTester() 594 .mr(4) 595 .nr(8) 596 .kr(1) 597 .sr(1) 598 .m(m) 599 .n(8) 600 .k(8) 601 .iterations(1) 602 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 603 } 604 } 605 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile_n)606 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_n) { 607 TEST_REQUIRES_ARM_NEON; 608 for (uint32_t n = 1; n <= 8; n++) { 609 GemmMicrokernelTester() 610 .mr(4) 611 .nr(8) 612 .kr(1) 613 .sr(1) 614 .m(4) 615 .n(n) 616 .k(8) 617 .iterations(1) 618 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 619 } 620 } 621 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_lt_8)622 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_lt_8) { 623 TEST_REQUIRES_ARM_NEON; 624 for (size_t k = 1; k < 8; k++) { 625 GemmMicrokernelTester() 626 .mr(4) 627 .nr(8) 628 .kr(1) 629 .sr(1) 630 .m(4) 631 .n(8) 632 .k(k) 633 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 634 } 635 } 636 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_lt_8_subtile)637 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_lt_8_subtile) { 638 TEST_REQUIRES_ARM_NEON; 639 for (size_t k = 1; k < 8; k++) { 640 for (uint32_t n = 1; n <= 8; n++) { 641 for (uint32_t m = 1; m <= 4; m++) { 642 GemmMicrokernelTester() 643 .mr(4) 644 .nr(8) 645 .kr(1) 646 .sr(1) 647 .m(m) 648 .n(n) 649 .k(k) 650 .iterations(1) 651 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 652 } 653 } 654 } 655 } 656 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_gt_8)657 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_gt_8) { 658 TEST_REQUIRES_ARM_NEON; 659 for (size_t k = 9; k < 16; k++) { 660 GemmMicrokernelTester() 661 .mr(4) 662 .nr(8) 663 .kr(1) 664 .sr(1) 665 .m(4) 666 .n(8) 667 .k(k) 668 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 669 } 670 } 671 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_gt_8_subtile)672 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_gt_8_subtile) { 673 TEST_REQUIRES_ARM_NEON; 674 for (size_t k = 9; k < 16; k++) { 675 for (uint32_t n = 1; n <= 8; n++) { 676 for (uint32_t m = 1; m <= 4; m++) { 677 GemmMicrokernelTester() 678 .mr(4) 679 .nr(8) 680 .kr(1) 681 .sr(1) 682 .m(m) 683 .n(n) 684 .k(k) 685 .iterations(1) 686 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 687 } 688 } 689 } 690 } 691 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_div_8)692 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_div_8) { 693 TEST_REQUIRES_ARM_NEON; 694 for (size_t k = 16; k <= 80; k += 8) { 695 GemmMicrokernelTester() 696 .mr(4) 697 .nr(8) 698 .kr(1) 699 .sr(1) 700 .m(4) 701 .n(8) 702 .k(k) 703 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 704 } 705 } 706 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_div_8_subtile)707 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_div_8_subtile) { 708 TEST_REQUIRES_ARM_NEON; 709 for (size_t k = 16; k <= 80; k += 8) { 710 for (uint32_t n = 1; n <= 8; n++) { 711 for (uint32_t m = 1; m <= 4; m++) { 712 GemmMicrokernelTester() 713 .mr(4) 714 .nr(8) 715 .kr(1) 716 .sr(1) 717 .m(m) 718 .n(n) 719 .k(k) 720 .iterations(1) 721 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 722 } 723 } 724 } 725 } 726 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_gt_8)727 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_gt_8) { 728 TEST_REQUIRES_ARM_NEON; 729 for (uint32_t n = 9; n < 16; n++) { 730 for (size_t k = 1; k <= 40; k += 9) { 731 GemmMicrokernelTester() 732 .mr(4) 733 .nr(8) 734 .kr(1) 735 .sr(1) 736 .m(4) 737 .n(n) 738 .k(k) 739 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 740 } 741 } 742 } 743 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_gt_8_strided_cn)744 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_gt_8_strided_cn) { 745 TEST_REQUIRES_ARM_NEON; 746 for (uint32_t n = 9; n < 16; n++) { 747 for (size_t k = 1; k <= 40; k += 9) { 748 GemmMicrokernelTester() 749 .mr(4) 750 .nr(8) 751 .kr(1) 752 .sr(1) 753 .m(4) 754 .n(n) 755 .k(k) 756 .cn_stride(11) 757 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 758 } 759 } 760 } 761 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_gt_8_subtile)762 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_gt_8_subtile) { 763 TEST_REQUIRES_ARM_NEON; 764 for (uint32_t n = 9; n < 16; n++) { 765 for (size_t k = 1; k <= 40; k += 9) { 766 for (uint32_t m = 1; m <= 4; m++) { 767 GemmMicrokernelTester() 768 .mr(4) 769 .nr(8) 770 .kr(1) 771 .sr(1) 772 .m(m) 773 .n(n) 774 .k(k) 775 .iterations(1) 776 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 777 } 778 } 779 } 780 } 781 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_div_8)782 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_div_8) { 783 TEST_REQUIRES_ARM_NEON; 784 for (uint32_t n = 16; n <= 24; n += 8) { 785 for (size_t k = 1; k <= 40; k += 9) { 786 GemmMicrokernelTester() 787 .mr(4) 788 .nr(8) 789 .kr(1) 790 .sr(1) 791 .m(4) 792 .n(n) 793 .k(k) 794 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 795 } 796 } 797 } 798 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_div_8_strided_cn)799 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_div_8_strided_cn) { 800 TEST_REQUIRES_ARM_NEON; 801 for (uint32_t n = 16; n <= 24; n += 8) { 802 for (size_t k = 1; k <= 40; k += 9) { 803 GemmMicrokernelTester() 804 .mr(4) 805 .nr(8) 806 .kr(1) 807 .sr(1) 808 .m(4) 809 .n(n) 810 .k(k) 811 .cn_stride(11) 812 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 813 } 814 } 815 } 816 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_div_8_subtile)817 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_div_8_subtile) { 818 TEST_REQUIRES_ARM_NEON; 819 for (uint32_t n = 16; n <= 24; n += 8) { 820 for (size_t k = 1; k <= 40; k += 9) { 821 for (uint32_t m = 1; m <= 4; m++) { 822 GemmMicrokernelTester() 823 .mr(4) 824 .nr(8) 825 .kr(1) 826 .sr(1) 827 .m(m) 828 .n(n) 829 .k(k) 830 .iterations(1) 831 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 832 } 833 } 834 } 835 } 836 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,small_kernel)837 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, small_kernel) { 838 TEST_REQUIRES_ARM_NEON; 839 for (size_t k = 1; k <= 40; k += 9) { 840 GemmMicrokernelTester() 841 .mr(4) 842 .nr(8) 843 .kr(1) 844 .sr(1) 845 .m(4) 846 .n(8) 847 .k(k) 848 .ks(3) 849 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 850 } 851 } 852 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,small_kernel_subtile)853 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, small_kernel_subtile) { 854 TEST_REQUIRES_ARM_NEON; 855 for (size_t k = 1; k <= 40; k += 9) { 856 for (uint32_t n = 1; n <= 8; n++) { 857 for (uint32_t m = 1; m <= 4; m++) { 858 GemmMicrokernelTester() 859 .mr(4) 860 .nr(8) 861 .kr(1) 862 .sr(1) 863 .m(m) 864 .n(n) 865 .k(k) 866 .ks(3) 867 .iterations(1) 868 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 869 } 870 } 871 } 872 } 873 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_gt_8_small_kernel)874 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_gt_8_small_kernel) { 875 TEST_REQUIRES_ARM_NEON; 876 for (uint32_t n = 9; n < 16; n++) { 877 for (size_t k = 1; k <= 40; k += 9) { 878 GemmMicrokernelTester() 879 .mr(4) 880 .nr(8) 881 .kr(1) 882 .sr(1) 883 .m(4) 884 .n(n) 885 .k(k) 886 .ks(3) 887 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 888 } 889 } 890 } 891 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_div_8_small_kernel)892 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_div_8_small_kernel) { 893 TEST_REQUIRES_ARM_NEON; 894 for (uint32_t n = 16; n <= 24; n += 8) { 895 for (size_t k = 1; k <= 40; k += 9) { 896 GemmMicrokernelTester() 897 .mr(4) 898 .nr(8) 899 .kr(1) 900 .sr(1) 901 .m(4) 902 .n(n) 903 .k(k) 904 .ks(3) 905 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 906 } 907 } 908 } 909 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,strided_cm_subtile)910 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, strided_cm_subtile) { 911 TEST_REQUIRES_ARM_NEON; 912 for (size_t k = 1; k <= 40; k += 9) { 913 for (uint32_t n = 1; n <= 8; n++) { 914 for (uint32_t m = 1; m <= 4; m++) { 915 GemmMicrokernelTester() 916 .mr(4) 917 .nr(8) 918 .kr(1) 919 .sr(1) 920 .m(m) 921 .n(n) 922 .k(k) 923 .cm_stride(11) 924 .iterations(1) 925 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 926 } 927 } 928 } 929 } 930 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,a_offset)931 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, a_offset) { 932 TEST_REQUIRES_ARM_NEON; 933 for (size_t k = 1; k <= 40; k += 9) { 934 GemmMicrokernelTester() 935 .mr(4) 936 .nr(8) 937 .kr(1) 938 .sr(1) 939 .m(4) 940 .n(8) 941 .k(k) 942 .ks(3) 943 .a_offset(163) 944 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 945 } 946 } 947 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,zero)948 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, zero) { 949 TEST_REQUIRES_ARM_NEON; 950 for (size_t k = 1; k <= 40; k += 9) { 951 for (uint32_t mz = 0; mz < 4; mz++) { 952 GemmMicrokernelTester() 953 .mr(4) 954 .nr(8) 955 .kr(1) 956 .sr(1) 957 .m(4) 958 .n(8) 959 .k(k) 960 .ks(3) 961 .a_offset(163) 962 .zero_index(mz) 963 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 964 } 965 } 966 } 967 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,qmin)968 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, qmin) { 969 TEST_REQUIRES_ARM_NEON; 970 GemmMicrokernelTester() 971 .mr(4) 972 .nr(8) 973 .kr(1) 974 .sr(1) 975 .m(4) 976 .n(8) 977 .k(8) 978 .qmin(128) 979 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 980 } 981 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,qmax)982 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, qmax) { 983 TEST_REQUIRES_ARM_NEON; 984 GemmMicrokernelTester() 985 .mr(4) 986 .nr(8) 987 .kr(1) 988 .sr(1) 989 .m(4) 990 .n(8) 991 .k(8) 992 .qmax(128) 993 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 994 } 995 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,strided_cm)996 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, strided_cm) { 997 TEST_REQUIRES_ARM_NEON; 998 GemmMicrokernelTester() 999 .mr(4) 1000 .nr(8) 1001 .kr(1) 1002 .sr(1) 1003 .m(4) 1004 .n(8) 1005 .k(8) 1006 .cm_stride(11) 1007 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1008 } 1009 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,no_a_zero_point)1010 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, no_a_zero_point) { 1011 TEST_REQUIRES_ARM_NEON; 1012 for (size_t k = 1; k <= 40; k += 9) { 1013 GemmMicrokernelTester() 1014 .mr(4) 1015 .nr(8) 1016 .kr(1) 1017 .sr(1) 1018 .m(4) 1019 .n(8) 1020 .k(k) 1021 .a_zero_point(0) 1022 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1023 } 1024 } 1025 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,no_b_zero_point)1026 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, no_b_zero_point) { 1027 TEST_REQUIRES_ARM_NEON; 1028 for (size_t k = 1; k <= 40; k += 9) { 1029 GemmMicrokernelTester() 1030 .mr(4) 1031 .nr(8) 1032 .kr(1) 1033 .sr(1) 1034 .m(4) 1035 .n(8) 1036 .k(k) 1037 .b_zero_point(0) 1038 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1039 } 1040 } 1041 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,no_zero_point)1042 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, no_zero_point) { 1043 TEST_REQUIRES_ARM_NEON; 1044 for (size_t k = 1; k <= 40; k += 9) { 1045 GemmMicrokernelTester() 1046 .mr(4) 1047 .nr(8) 1048 .kr(1) 1049 .sr(1) 1050 .m(4) 1051 .n(8) 1052 .k(k) 1053 .a_zero_point(0) 1054 .b_zero_point(0) 1055 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1056 } 1057 } 1058 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY 1059 1060 1061 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8)1062 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8) { 1063 TEST_REQUIRES_ARM_NEON; 1064 GemmMicrokernelTester() 1065 .mr(4) 1066 .nr(8) 1067 .kr(1) 1068 .sr(1) 1069 .m(4) 1070 .n(8) 1071 .k(8) 1072 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1073 } 1074 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cn)1075 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) { 1076 TEST_REQUIRES_ARM_NEON; 1077 GemmMicrokernelTester() 1078 .mr(4) 1079 .nr(8) 1080 .kr(1) 1081 .sr(1) 1082 .m(4) 1083 .n(8) 1084 .k(8) 1085 .cn_stride(11) 1086 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1087 } 1088 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile)1089 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) { 1090 TEST_REQUIRES_ARM_NEON; 1091 for (uint32_t n = 1; n <= 8; n++) { 1092 for (uint32_t m = 1; m <= 4; m++) { 1093 GemmMicrokernelTester() 1094 .mr(4) 1095 .nr(8) 1096 .kr(1) 1097 .sr(1) 1098 .m(m) 1099 .n(n) 1100 .k(8) 1101 .iterations(1) 1102 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1103 } 1104 } 1105 } 1106 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_m)1107 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_m) { 1108 TEST_REQUIRES_ARM_NEON; 1109 for (uint32_t m = 1; m <= 4; m++) { 1110 GemmMicrokernelTester() 1111 .mr(4) 1112 .nr(8) 1113 .kr(1) 1114 .sr(1) 1115 .m(m) 1116 .n(8) 1117 .k(8) 1118 .iterations(1) 1119 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1120 } 1121 } 1122 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_n)1123 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_n) { 1124 TEST_REQUIRES_ARM_NEON; 1125 for (uint32_t n = 1; n <= 8; n++) { 1126 GemmMicrokernelTester() 1127 .mr(4) 1128 .nr(8) 1129 .kr(1) 1130 .sr(1) 1131 .m(4) 1132 .n(n) 1133 .k(8) 1134 .iterations(1) 1135 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1136 } 1137 } 1138 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8)1139 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8) { 1140 TEST_REQUIRES_ARM_NEON; 1141 for (size_t k = 1; k < 8; k++) { 1142 GemmMicrokernelTester() 1143 .mr(4) 1144 .nr(8) 1145 .kr(1) 1146 .sr(1) 1147 .m(4) 1148 .n(8) 1149 .k(k) 1150 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1151 } 1152 } 1153 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8_subtile)1154 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_subtile) { 1155 TEST_REQUIRES_ARM_NEON; 1156 for (size_t k = 1; k < 8; k++) { 1157 for (uint32_t n = 1; n <= 8; n++) { 1158 for (uint32_t m = 1; m <= 4; m++) { 1159 GemmMicrokernelTester() 1160 .mr(4) 1161 .nr(8) 1162 .kr(1) 1163 .sr(1) 1164 .m(m) 1165 .n(n) 1166 .k(k) 1167 .iterations(1) 1168 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1169 } 1170 } 1171 } 1172 } 1173 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8)1174 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8) { 1175 TEST_REQUIRES_ARM_NEON; 1176 for (size_t k = 9; k < 16; k++) { 1177 GemmMicrokernelTester() 1178 .mr(4) 1179 .nr(8) 1180 .kr(1) 1181 .sr(1) 1182 .m(4) 1183 .n(8) 1184 .k(k) 1185 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1186 } 1187 } 1188 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8_subtile)1189 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_subtile) { 1190 TEST_REQUIRES_ARM_NEON; 1191 for (size_t k = 9; k < 16; k++) { 1192 for (uint32_t n = 1; n <= 8; n++) { 1193 for (uint32_t m = 1; m <= 4; m++) { 1194 GemmMicrokernelTester() 1195 .mr(4) 1196 .nr(8) 1197 .kr(1) 1198 .sr(1) 1199 .m(m) 1200 .n(n) 1201 .k(k) 1202 .iterations(1) 1203 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1204 } 1205 } 1206 } 1207 } 1208 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_div_8)1209 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8) { 1210 TEST_REQUIRES_ARM_NEON; 1211 for (size_t k = 16; k <= 80; k += 8) { 1212 GemmMicrokernelTester() 1213 .mr(4) 1214 .nr(8) 1215 .kr(1) 1216 .sr(1) 1217 .m(4) 1218 .n(8) 1219 .k(k) 1220 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1221 } 1222 } 1223 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_div_8_subtile)1224 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_subtile) { 1225 TEST_REQUIRES_ARM_NEON; 1226 for (size_t k = 16; k <= 80; k += 8) { 1227 for (uint32_t n = 1; n <= 8; n++) { 1228 for (uint32_t m = 1; m <= 4; m++) { 1229 GemmMicrokernelTester() 1230 .mr(4) 1231 .nr(8) 1232 .kr(1) 1233 .sr(1) 1234 .m(m) 1235 .n(n) 1236 .k(k) 1237 .iterations(1) 1238 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1239 } 1240 } 1241 } 1242 } 1243 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8)1244 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8) { 1245 TEST_REQUIRES_ARM_NEON; 1246 for (uint32_t n = 9; n < 16; n++) { 1247 for (size_t k = 1; k <= 40; k += 9) { 1248 GemmMicrokernelTester() 1249 .mr(4) 1250 .nr(8) 1251 .kr(1) 1252 .sr(1) 1253 .m(4) 1254 .n(n) 1255 .k(k) 1256 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1257 } 1258 } 1259 } 1260 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8_strided_cn)1261 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8_strided_cn) { 1262 TEST_REQUIRES_ARM_NEON; 1263 for (uint32_t n = 9; n < 16; n++) { 1264 for (size_t k = 1; k <= 40; k += 9) { 1265 GemmMicrokernelTester() 1266 .mr(4) 1267 .nr(8) 1268 .kr(1) 1269 .sr(1) 1270 .m(4) 1271 .n(n) 1272 .k(k) 1273 .cn_stride(11) 1274 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1275 } 1276 } 1277 } 1278 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8_subtile)1279 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8_subtile) { 1280 TEST_REQUIRES_ARM_NEON; 1281 for (uint32_t n = 9; n < 16; n++) { 1282 for (size_t k = 1; k <= 40; k += 9) { 1283 for (uint32_t m = 1; m <= 4; m++) { 1284 GemmMicrokernelTester() 1285 .mr(4) 1286 .nr(8) 1287 .kr(1) 1288 .sr(1) 1289 .m(m) 1290 .n(n) 1291 .k(k) 1292 .iterations(1) 1293 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1294 } 1295 } 1296 } 1297 } 1298 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_8)1299 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_8) { 1300 TEST_REQUIRES_ARM_NEON; 1301 for (uint32_t n = 16; n <= 24; n += 8) { 1302 for (size_t k = 1; k <= 40; k += 9) { 1303 GemmMicrokernelTester() 1304 .mr(4) 1305 .nr(8) 1306 .kr(1) 1307 .sr(1) 1308 .m(4) 1309 .n(n) 1310 .k(k) 1311 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1312 } 1313 } 1314 } 1315 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_8_strided_cn)1316 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_8_strided_cn) { 1317 TEST_REQUIRES_ARM_NEON; 1318 for (uint32_t n = 16; n <= 24; n += 8) { 1319 for (size_t k = 1; k <= 40; k += 9) { 1320 GemmMicrokernelTester() 1321 .mr(4) 1322 .nr(8) 1323 .kr(1) 1324 .sr(1) 1325 .m(4) 1326 .n(n) 1327 .k(k) 1328 .cn_stride(11) 1329 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1330 } 1331 } 1332 } 1333 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_8_subtile)1334 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_8_subtile) { 1335 TEST_REQUIRES_ARM_NEON; 1336 for (uint32_t n = 16; n <= 24; n += 8) { 1337 for (size_t k = 1; k <= 40; k += 9) { 1338 for (uint32_t m = 1; m <= 4; m++) { 1339 GemmMicrokernelTester() 1340 .mr(4) 1341 .nr(8) 1342 .kr(1) 1343 .sr(1) 1344 .m(m) 1345 .n(n) 1346 .k(k) 1347 .iterations(1) 1348 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1349 } 1350 } 1351 } 1352 } 1353 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,small_kernel)1354 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel) { 1355 TEST_REQUIRES_ARM_NEON; 1356 for (size_t k = 1; k <= 40; k += 9) { 1357 GemmMicrokernelTester() 1358 .mr(4) 1359 .nr(8) 1360 .kr(1) 1361 .sr(1) 1362 .m(4) 1363 .n(8) 1364 .k(k) 1365 .ks(3) 1366 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1367 } 1368 } 1369 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,small_kernel_subtile)1370 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel_subtile) { 1371 TEST_REQUIRES_ARM_NEON; 1372 for (size_t k = 1; k <= 40; k += 9) { 1373 for (uint32_t n = 1; n <= 8; n++) { 1374 for (uint32_t m = 1; m <= 4; m++) { 1375 GemmMicrokernelTester() 1376 .mr(4) 1377 .nr(8) 1378 .kr(1) 1379 .sr(1) 1380 .m(m) 1381 .n(n) 1382 .k(k) 1383 .ks(3) 1384 .iterations(1) 1385 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1386 } 1387 } 1388 } 1389 } 1390 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8_small_kernel)1391 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8_small_kernel) { 1392 TEST_REQUIRES_ARM_NEON; 1393 for (uint32_t n = 9; n < 16; n++) { 1394 for (size_t k = 1; k <= 40; k += 9) { 1395 GemmMicrokernelTester() 1396 .mr(4) 1397 .nr(8) 1398 .kr(1) 1399 .sr(1) 1400 .m(4) 1401 .n(n) 1402 .k(k) 1403 .ks(3) 1404 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1405 } 1406 } 1407 } 1408 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_8_small_kernel)1409 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_8_small_kernel) { 1410 TEST_REQUIRES_ARM_NEON; 1411 for (uint32_t n = 16; n <= 24; n += 8) { 1412 for (size_t k = 1; k <= 40; k += 9) { 1413 GemmMicrokernelTester() 1414 .mr(4) 1415 .nr(8) 1416 .kr(1) 1417 .sr(1) 1418 .m(4) 1419 .n(n) 1420 .k(k) 1421 .ks(3) 1422 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1423 } 1424 } 1425 } 1426 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cm_subtile)1427 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm_subtile) { 1428 TEST_REQUIRES_ARM_NEON; 1429 for (size_t k = 1; k <= 40; k += 9) { 1430 for (uint32_t n = 1; n <= 8; n++) { 1431 for (uint32_t m = 1; m <= 4; m++) { 1432 GemmMicrokernelTester() 1433 .mr(4) 1434 .nr(8) 1435 .kr(1) 1436 .sr(1) 1437 .m(m) 1438 .n(n) 1439 .k(k) 1440 .cm_stride(11) 1441 .iterations(1) 1442 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1443 } 1444 } 1445 } 1446 } 1447 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,a_offset)1448 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, a_offset) { 1449 TEST_REQUIRES_ARM_NEON; 1450 for (size_t k = 1; k <= 40; k += 9) { 1451 GemmMicrokernelTester() 1452 .mr(4) 1453 .nr(8) 1454 .kr(1) 1455 .sr(1) 1456 .m(4) 1457 .n(8) 1458 .k(k) 1459 .ks(3) 1460 .a_offset(163) 1461 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1462 } 1463 } 1464 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,zero)1465 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, zero) { 1466 TEST_REQUIRES_ARM_NEON; 1467 for (size_t k = 1; k <= 40; k += 9) { 1468 for (uint32_t mz = 0; mz < 4; mz++) { 1469 GemmMicrokernelTester() 1470 .mr(4) 1471 .nr(8) 1472 .kr(1) 1473 .sr(1) 1474 .m(4) 1475 .n(8) 1476 .k(k) 1477 .ks(3) 1478 .a_offset(163) 1479 .zero_index(mz) 1480 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1481 } 1482 } 1483 } 1484 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,qmin)1485 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmin) { 1486 TEST_REQUIRES_ARM_NEON; 1487 GemmMicrokernelTester() 1488 .mr(4) 1489 .nr(8) 1490 .kr(1) 1491 .sr(1) 1492 .m(4) 1493 .n(8) 1494 .k(8) 1495 .qmin(128) 1496 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1497 } 1498 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,qmax)1499 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmax) { 1500 TEST_REQUIRES_ARM_NEON; 1501 GemmMicrokernelTester() 1502 .mr(4) 1503 .nr(8) 1504 .kr(1) 1505 .sr(1) 1506 .m(4) 1507 .n(8) 1508 .k(8) 1509 .qmax(128) 1510 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1511 } 1512 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cm)1513 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) { 1514 TEST_REQUIRES_ARM_NEON; 1515 GemmMicrokernelTester() 1516 .mr(4) 1517 .nr(8) 1518 .kr(1) 1519 .sr(1) 1520 .m(4) 1521 .n(8) 1522 .k(8) 1523 .cm_stride(11) 1524 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1525 } 1526 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,no_a_zero_point)1527 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, no_a_zero_point) { 1528 TEST_REQUIRES_ARM_NEON; 1529 for (size_t k = 1; k <= 40; k += 9) { 1530 GemmMicrokernelTester() 1531 .mr(4) 1532 .nr(8) 1533 .kr(1) 1534 .sr(1) 1535 .m(4) 1536 .n(8) 1537 .k(k) 1538 .a_zero_point(0) 1539 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1540 } 1541 } 1542 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,no_b_zero_point)1543 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, no_b_zero_point) { 1544 TEST_REQUIRES_ARM_NEON; 1545 for (size_t k = 1; k <= 40; k += 9) { 1546 GemmMicrokernelTester() 1547 .mr(4) 1548 .nr(8) 1549 .kr(1) 1550 .sr(1) 1551 .m(4) 1552 .n(8) 1553 .k(k) 1554 .b_zero_point(0) 1555 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1556 } 1557 } 1558 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,no_zero_point)1559 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, no_zero_point) { 1560 TEST_REQUIRES_ARM_NEON; 1561 for (size_t k = 1; k <= 40; k += 9) { 1562 GemmMicrokernelTester() 1563 .mr(4) 1564 .nr(8) 1565 .kr(1) 1566 .sr(1) 1567 .m(4) 1568 .n(8) 1569 .k(k) 1570 .a_zero_point(0) 1571 .b_zero_point(0) 1572 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1573 } 1574 } 1575 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY 1576 1577 1578 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_eq_8)1579 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8) { 1580 TEST_REQUIRES_ARM_NEON; 1581 GemmMicrokernelTester() 1582 .mr(4) 1583 .nr(16) 1584 .kr(1) 1585 .sr(1) 1586 .m(4) 1587 .n(16) 1588 .k(8) 1589 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1590 } 1591 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,strided_cn)1592 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cn) { 1593 TEST_REQUIRES_ARM_NEON; 1594 GemmMicrokernelTester() 1595 .mr(4) 1596 .nr(16) 1597 .kr(1) 1598 .sr(1) 1599 .m(4) 1600 .n(16) 1601 .k(8) 1602 .cn_stride(19) 1603 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1604 } 1605 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile)1606 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile) { 1607 TEST_REQUIRES_ARM_NEON; 1608 for (uint32_t n = 1; n <= 16; n++) { 1609 for (uint32_t m = 1; m <= 4; m++) { 1610 GemmMicrokernelTester() 1611 .mr(4) 1612 .nr(16) 1613 .kr(1) 1614 .sr(1) 1615 .m(m) 1616 .n(n) 1617 .k(8) 1618 .iterations(1) 1619 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1620 } 1621 } 1622 } 1623 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile_m)1624 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_m) { 1625 TEST_REQUIRES_ARM_NEON; 1626 for (uint32_t m = 1; m <= 4; m++) { 1627 GemmMicrokernelTester() 1628 .mr(4) 1629 .nr(16) 1630 .kr(1) 1631 .sr(1) 1632 .m(m) 1633 .n(16) 1634 .k(8) 1635 .iterations(1) 1636 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1637 } 1638 } 1639 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile_n)1640 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_n) { 1641 TEST_REQUIRES_ARM_NEON; 1642 for (uint32_t n = 1; n <= 16; n++) { 1643 GemmMicrokernelTester() 1644 .mr(4) 1645 .nr(16) 1646 .kr(1) 1647 .sr(1) 1648 .m(4) 1649 .n(n) 1650 .k(8) 1651 .iterations(1) 1652 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1653 } 1654 } 1655 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_lt_8)1656 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8) { 1657 TEST_REQUIRES_ARM_NEON; 1658 for (size_t k = 1; k < 8; k++) { 1659 GemmMicrokernelTester() 1660 .mr(4) 1661 .nr(16) 1662 .kr(1) 1663 .sr(1) 1664 .m(4) 1665 .n(16) 1666 .k(k) 1667 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1668 } 1669 } 1670 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_lt_8_subtile)1671 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8_subtile) { 1672 TEST_REQUIRES_ARM_NEON; 1673 for (size_t k = 1; k < 8; k++) { 1674 for (uint32_t n = 1; n <= 16; n++) { 1675 for (uint32_t m = 1; m <= 4; m++) { 1676 GemmMicrokernelTester() 1677 .mr(4) 1678 .nr(16) 1679 .kr(1) 1680 .sr(1) 1681 .m(m) 1682 .n(n) 1683 .k(k) 1684 .iterations(1) 1685 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1686 } 1687 } 1688 } 1689 } 1690 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_gt_8)1691 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8) { 1692 TEST_REQUIRES_ARM_NEON; 1693 for (size_t k = 9; k < 16; k++) { 1694 GemmMicrokernelTester() 1695 .mr(4) 1696 .nr(16) 1697 .kr(1) 1698 .sr(1) 1699 .m(4) 1700 .n(16) 1701 .k(k) 1702 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1703 } 1704 } 1705 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_gt_8_subtile)1706 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8_subtile) { 1707 TEST_REQUIRES_ARM_NEON; 1708 for (size_t k = 9; k < 16; k++) { 1709 for (uint32_t n = 1; n <= 16; n++) { 1710 for (uint32_t m = 1; m <= 4; m++) { 1711 GemmMicrokernelTester() 1712 .mr(4) 1713 .nr(16) 1714 .kr(1) 1715 .sr(1) 1716 .m(m) 1717 .n(n) 1718 .k(k) 1719 .iterations(1) 1720 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1721 } 1722 } 1723 } 1724 } 1725 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_div_8)1726 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8) { 1727 TEST_REQUIRES_ARM_NEON; 1728 for (size_t k = 16; k <= 80; k += 8) { 1729 GemmMicrokernelTester() 1730 .mr(4) 1731 .nr(16) 1732 .kr(1) 1733 .sr(1) 1734 .m(4) 1735 .n(16) 1736 .k(k) 1737 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1738 } 1739 } 1740 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_div_8_subtile)1741 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8_subtile) { 1742 TEST_REQUIRES_ARM_NEON; 1743 for (size_t k = 16; k <= 80; k += 8) { 1744 for (uint32_t n = 1; n <= 16; n++) { 1745 for (uint32_t m = 1; m <= 4; m++) { 1746 GemmMicrokernelTester() 1747 .mr(4) 1748 .nr(16) 1749 .kr(1) 1750 .sr(1) 1751 .m(m) 1752 .n(n) 1753 .k(k) 1754 .iterations(1) 1755 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1756 } 1757 } 1758 } 1759 } 1760 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_gt_16)1761 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16) { 1762 TEST_REQUIRES_ARM_NEON; 1763 for (uint32_t n = 17; n < 32; n++) { 1764 for (size_t k = 1; k <= 40; k += 9) { 1765 GemmMicrokernelTester() 1766 .mr(4) 1767 .nr(16) 1768 .kr(1) 1769 .sr(1) 1770 .m(4) 1771 .n(n) 1772 .k(k) 1773 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1774 } 1775 } 1776 } 1777 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_gt_16_strided_cn)1778 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_strided_cn) { 1779 TEST_REQUIRES_ARM_NEON; 1780 for (uint32_t n = 17; n < 32; n++) { 1781 for (size_t k = 1; k <= 40; k += 9) { 1782 GemmMicrokernelTester() 1783 .mr(4) 1784 .nr(16) 1785 .kr(1) 1786 .sr(1) 1787 .m(4) 1788 .n(n) 1789 .k(k) 1790 .cn_stride(19) 1791 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1792 } 1793 } 1794 } 1795 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_gt_16_subtile)1796 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_subtile) { 1797 TEST_REQUIRES_ARM_NEON; 1798 for (uint32_t n = 17; n < 32; n++) { 1799 for (size_t k = 1; k <= 40; k += 9) { 1800 for (uint32_t m = 1; m <= 4; m++) { 1801 GemmMicrokernelTester() 1802 .mr(4) 1803 .nr(16) 1804 .kr(1) 1805 .sr(1) 1806 .m(m) 1807 .n(n) 1808 .k(k) 1809 .iterations(1) 1810 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1811 } 1812 } 1813 } 1814 } 1815 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_div_16)1816 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16) { 1817 TEST_REQUIRES_ARM_NEON; 1818 for (uint32_t n = 32; n <= 48; n += 16) { 1819 for (size_t k = 1; k <= 40; k += 9) { 1820 GemmMicrokernelTester() 1821 .mr(4) 1822 .nr(16) 1823 .kr(1) 1824 .sr(1) 1825 .m(4) 1826 .n(n) 1827 .k(k) 1828 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1829 } 1830 } 1831 } 1832 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_div_16_strided_cn)1833 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_strided_cn) { 1834 TEST_REQUIRES_ARM_NEON; 1835 for (uint32_t n = 32; n <= 48; n += 16) { 1836 for (size_t k = 1; k <= 40; k += 9) { 1837 GemmMicrokernelTester() 1838 .mr(4) 1839 .nr(16) 1840 .kr(1) 1841 .sr(1) 1842 .m(4) 1843 .n(n) 1844 .k(k) 1845 .cn_stride(19) 1846 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1847 } 1848 } 1849 } 1850 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_div_16_subtile)1851 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_subtile) { 1852 TEST_REQUIRES_ARM_NEON; 1853 for (uint32_t n = 32; n <= 48; n += 16) { 1854 for (size_t k = 1; k <= 40; k += 9) { 1855 for (uint32_t m = 1; m <= 4; m++) { 1856 GemmMicrokernelTester() 1857 .mr(4) 1858 .nr(16) 1859 .kr(1) 1860 .sr(1) 1861 .m(m) 1862 .n(n) 1863 .k(k) 1864 .iterations(1) 1865 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1866 } 1867 } 1868 } 1869 } 1870 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,small_kernel)1871 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, small_kernel) { 1872 TEST_REQUIRES_ARM_NEON; 1873 for (size_t k = 1; k <= 40; k += 9) { 1874 GemmMicrokernelTester() 1875 .mr(4) 1876 .nr(16) 1877 .kr(1) 1878 .sr(1) 1879 .m(4) 1880 .n(16) 1881 .k(k) 1882 .ks(3) 1883 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1884 } 1885 } 1886 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,small_kernel_subtile)1887 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, small_kernel_subtile) { 1888 TEST_REQUIRES_ARM_NEON; 1889 for (size_t k = 1; k <= 40; k += 9) { 1890 for (uint32_t n = 1; n <= 16; n++) { 1891 for (uint32_t m = 1; m <= 4; m++) { 1892 GemmMicrokernelTester() 1893 .mr(4) 1894 .nr(16) 1895 .kr(1) 1896 .sr(1) 1897 .m(m) 1898 .n(n) 1899 .k(k) 1900 .ks(3) 1901 .iterations(1) 1902 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1903 } 1904 } 1905 } 1906 } 1907 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_gt_16_small_kernel)1908 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_small_kernel) { 1909 TEST_REQUIRES_ARM_NEON; 1910 for (uint32_t n = 17; n < 32; n++) { 1911 for (size_t k = 1; k <= 40; k += 9) { 1912 GemmMicrokernelTester() 1913 .mr(4) 1914 .nr(16) 1915 .kr(1) 1916 .sr(1) 1917 .m(4) 1918 .n(n) 1919 .k(k) 1920 .ks(3) 1921 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1922 } 1923 } 1924 } 1925 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_div_16_small_kernel)1926 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_small_kernel) { 1927 TEST_REQUIRES_ARM_NEON; 1928 for (uint32_t n = 32; n <= 48; n += 16) { 1929 for (size_t k = 1; k <= 40; k += 9) { 1930 GemmMicrokernelTester() 1931 .mr(4) 1932 .nr(16) 1933 .kr(1) 1934 .sr(1) 1935 .m(4) 1936 .n(n) 1937 .k(k) 1938 .ks(3) 1939 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1940 } 1941 } 1942 } 1943 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,strided_cm_subtile)1944 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm_subtile) { 1945 TEST_REQUIRES_ARM_NEON; 1946 for (size_t k = 1; k <= 40; k += 9) { 1947 for (uint32_t n = 1; n <= 16; n++) { 1948 for (uint32_t m = 1; m <= 4; m++) { 1949 GemmMicrokernelTester() 1950 .mr(4) 1951 .nr(16) 1952 .kr(1) 1953 .sr(1) 1954 .m(m) 1955 .n(n) 1956 .k(k) 1957 .cm_stride(19) 1958 .iterations(1) 1959 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1960 } 1961 } 1962 } 1963 } 1964 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,a_offset)1965 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, a_offset) { 1966 TEST_REQUIRES_ARM_NEON; 1967 for (size_t k = 1; k <= 40; k += 9) { 1968 GemmMicrokernelTester() 1969 .mr(4) 1970 .nr(16) 1971 .kr(1) 1972 .sr(1) 1973 .m(4) 1974 .n(16) 1975 .k(k) 1976 .ks(3) 1977 .a_offset(163) 1978 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1979 } 1980 } 1981 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,zero)1982 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, zero) { 1983 TEST_REQUIRES_ARM_NEON; 1984 for (size_t k = 1; k <= 40; k += 9) { 1985 for (uint32_t mz = 0; mz < 4; mz++) { 1986 GemmMicrokernelTester() 1987 .mr(4) 1988 .nr(16) 1989 .kr(1) 1990 .sr(1) 1991 .m(4) 1992 .n(16) 1993 .k(k) 1994 .ks(3) 1995 .a_offset(163) 1996 .zero_index(mz) 1997 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1998 } 1999 } 2000 } 2001 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,qmin)2002 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmin) { 2003 TEST_REQUIRES_ARM_NEON; 2004 GemmMicrokernelTester() 2005 .mr(4) 2006 .nr(16) 2007 .kr(1) 2008 .sr(1) 2009 .m(4) 2010 .n(16) 2011 .k(8) 2012 .qmin(128) 2013 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2014 } 2015 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,qmax)2016 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmax) { 2017 TEST_REQUIRES_ARM_NEON; 2018 GemmMicrokernelTester() 2019 .mr(4) 2020 .nr(16) 2021 .kr(1) 2022 .sr(1) 2023 .m(4) 2024 .n(16) 2025 .k(8) 2026 .qmax(128) 2027 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2028 } 2029 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,strided_cm)2030 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm) { 2031 TEST_REQUIRES_ARM_NEON; 2032 GemmMicrokernelTester() 2033 .mr(4) 2034 .nr(16) 2035 .kr(1) 2036 .sr(1) 2037 .m(4) 2038 .n(16) 2039 .k(8) 2040 .cm_stride(19) 2041 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2042 } 2043 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,no_a_zero_point)2044 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, no_a_zero_point) { 2045 TEST_REQUIRES_ARM_NEON; 2046 for (size_t k = 1; k <= 40; k += 9) { 2047 GemmMicrokernelTester() 2048 .mr(4) 2049 .nr(16) 2050 .kr(1) 2051 .sr(1) 2052 .m(4) 2053 .n(16) 2054 .k(k) 2055 .a_zero_point(0) 2056 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2057 } 2058 } 2059 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,no_b_zero_point)2060 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, no_b_zero_point) { 2061 TEST_REQUIRES_ARM_NEON; 2062 for (size_t k = 1; k <= 40; k += 9) { 2063 GemmMicrokernelTester() 2064 .mr(4) 2065 .nr(16) 2066 .kr(1) 2067 .sr(1) 2068 .m(4) 2069 .n(16) 2070 .k(k) 2071 .b_zero_point(0) 2072 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2073 } 2074 } 2075 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,no_zero_point)2076 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, no_zero_point) { 2077 TEST_REQUIRES_ARM_NEON; 2078 for (size_t k = 1; k <= 40; k += 9) { 2079 GemmMicrokernelTester() 2080 .mr(4) 2081 .nr(16) 2082 .kr(1) 2083 .sr(1) 2084 .m(4) 2085 .n(16) 2086 .k(k) 2087 .a_zero_point(0) 2088 .b_zero_point(0) 2089 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2090 } 2091 } 2092 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY 2093 2094 2095 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,k_eq_8)2096 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_eq_8) { 2097 TEST_REQUIRES_ARM_NEON; 2098 GemmMicrokernelTester() 2099 .mr(4) 2100 .nr(16) 2101 .kr(1) 2102 .sr(1) 2103 .m(4) 2104 .n(16) 2105 .k(8) 2106 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2107 } 2108 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,strided_cn)2109 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, strided_cn) { 2110 TEST_REQUIRES_ARM_NEON; 2111 GemmMicrokernelTester() 2112 .mr(4) 2113 .nr(16) 2114 .kr(1) 2115 .sr(1) 2116 .m(4) 2117 .n(16) 2118 .k(8) 2119 .cn_stride(19) 2120 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2121 } 2122 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,k_eq_8_subtile)2123 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_eq_8_subtile) { 2124 TEST_REQUIRES_ARM_NEON; 2125 for (uint32_t n = 1; n <= 16; n++) { 2126 for (uint32_t m = 1; m <= 4; m++) { 2127 GemmMicrokernelTester() 2128 .mr(4) 2129 .nr(16) 2130 .kr(1) 2131 .sr(1) 2132 .m(m) 2133 .n(n) 2134 .k(8) 2135 .iterations(1) 2136 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2137 } 2138 } 2139 } 2140 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,k_eq_8_subtile_m)2141 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_eq_8_subtile_m) { 2142 TEST_REQUIRES_ARM_NEON; 2143 for (uint32_t m = 1; m <= 4; m++) { 2144 GemmMicrokernelTester() 2145 .mr(4) 2146 .nr(16) 2147 .kr(1) 2148 .sr(1) 2149 .m(m) 2150 .n(16) 2151 .k(8) 2152 .iterations(1) 2153 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2154 } 2155 } 2156 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,k_eq_8_subtile_n)2157 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_eq_8_subtile_n) { 2158 TEST_REQUIRES_ARM_NEON; 2159 for (uint32_t n = 1; n <= 16; n++) { 2160 GemmMicrokernelTester() 2161 .mr(4) 2162 .nr(16) 2163 .kr(1) 2164 .sr(1) 2165 .m(4) 2166 .n(n) 2167 .k(8) 2168 .iterations(1) 2169 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2170 } 2171 } 2172 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,k_lt_8)2173 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_lt_8) { 2174 TEST_REQUIRES_ARM_NEON; 2175 for (size_t k = 1; k < 8; k++) { 2176 GemmMicrokernelTester() 2177 .mr(4) 2178 .nr(16) 2179 .kr(1) 2180 .sr(1) 2181 .m(4) 2182 .n(16) 2183 .k(k) 2184 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2185 } 2186 } 2187 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,k_lt_8_subtile)2188 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_lt_8_subtile) { 2189 TEST_REQUIRES_ARM_NEON; 2190 for (size_t k = 1; k < 8; k++) { 2191 for (uint32_t n = 1; n <= 16; n++) { 2192 for (uint32_t m = 1; m <= 4; m++) { 2193 GemmMicrokernelTester() 2194 .mr(4) 2195 .nr(16) 2196 .kr(1) 2197 .sr(1) 2198 .m(m) 2199 .n(n) 2200 .k(k) 2201 .iterations(1) 2202 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2203 } 2204 } 2205 } 2206 } 2207 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,k_gt_8)2208 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_gt_8) { 2209 TEST_REQUIRES_ARM_NEON; 2210 for (size_t k = 9; k < 16; k++) { 2211 GemmMicrokernelTester() 2212 .mr(4) 2213 .nr(16) 2214 .kr(1) 2215 .sr(1) 2216 .m(4) 2217 .n(16) 2218 .k(k) 2219 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2220 } 2221 } 2222 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,k_gt_8_subtile)2223 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_gt_8_subtile) { 2224 TEST_REQUIRES_ARM_NEON; 2225 for (size_t k = 9; k < 16; k++) { 2226 for (uint32_t n = 1; n <= 16; n++) { 2227 for (uint32_t m = 1; m <= 4; m++) { 2228 GemmMicrokernelTester() 2229 .mr(4) 2230 .nr(16) 2231 .kr(1) 2232 .sr(1) 2233 .m(m) 2234 .n(n) 2235 .k(k) 2236 .iterations(1) 2237 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2238 } 2239 } 2240 } 2241 } 2242 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,k_div_8)2243 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_div_8) { 2244 TEST_REQUIRES_ARM_NEON; 2245 for (size_t k = 16; k <= 80; k += 8) { 2246 GemmMicrokernelTester() 2247 .mr(4) 2248 .nr(16) 2249 .kr(1) 2250 .sr(1) 2251 .m(4) 2252 .n(16) 2253 .k(k) 2254 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2255 } 2256 } 2257 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,k_div_8_subtile)2258 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_div_8_subtile) { 2259 TEST_REQUIRES_ARM_NEON; 2260 for (size_t k = 16; k <= 80; k += 8) { 2261 for (uint32_t n = 1; n <= 16; n++) { 2262 for (uint32_t m = 1; m <= 4; m++) { 2263 GemmMicrokernelTester() 2264 .mr(4) 2265 .nr(16) 2266 .kr(1) 2267 .sr(1) 2268 .m(m) 2269 .n(n) 2270 .k(k) 2271 .iterations(1) 2272 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2273 } 2274 } 2275 } 2276 } 2277 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,n_gt_16)2278 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_gt_16) { 2279 TEST_REQUIRES_ARM_NEON; 2280 for (uint32_t n = 17; n < 32; n++) { 2281 for (size_t k = 1; k <= 40; k += 9) { 2282 GemmMicrokernelTester() 2283 .mr(4) 2284 .nr(16) 2285 .kr(1) 2286 .sr(1) 2287 .m(4) 2288 .n(n) 2289 .k(k) 2290 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2291 } 2292 } 2293 } 2294 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,n_gt_16_strided_cn)2295 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_gt_16_strided_cn) { 2296 TEST_REQUIRES_ARM_NEON; 2297 for (uint32_t n = 17; n < 32; n++) { 2298 for (size_t k = 1; k <= 40; k += 9) { 2299 GemmMicrokernelTester() 2300 .mr(4) 2301 .nr(16) 2302 .kr(1) 2303 .sr(1) 2304 .m(4) 2305 .n(n) 2306 .k(k) 2307 .cn_stride(19) 2308 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2309 } 2310 } 2311 } 2312 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,n_gt_16_subtile)2313 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_gt_16_subtile) { 2314 TEST_REQUIRES_ARM_NEON; 2315 for (uint32_t n = 17; n < 32; n++) { 2316 for (size_t k = 1; k <= 40; k += 9) { 2317 for (uint32_t m = 1; m <= 4; m++) { 2318 GemmMicrokernelTester() 2319 .mr(4) 2320 .nr(16) 2321 .kr(1) 2322 .sr(1) 2323 .m(m) 2324 .n(n) 2325 .k(k) 2326 .iterations(1) 2327 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2328 } 2329 } 2330 } 2331 } 2332 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,n_div_16)2333 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_div_16) { 2334 TEST_REQUIRES_ARM_NEON; 2335 for (uint32_t n = 32; n <= 48; n += 16) { 2336 for (size_t k = 1; k <= 40; k += 9) { 2337 GemmMicrokernelTester() 2338 .mr(4) 2339 .nr(16) 2340 .kr(1) 2341 .sr(1) 2342 .m(4) 2343 .n(n) 2344 .k(k) 2345 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2346 } 2347 } 2348 } 2349 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,n_div_16_strided_cn)2350 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_div_16_strided_cn) { 2351 TEST_REQUIRES_ARM_NEON; 2352 for (uint32_t n = 32; n <= 48; n += 16) { 2353 for (size_t k = 1; k <= 40; k += 9) { 2354 GemmMicrokernelTester() 2355 .mr(4) 2356 .nr(16) 2357 .kr(1) 2358 .sr(1) 2359 .m(4) 2360 .n(n) 2361 .k(k) 2362 .cn_stride(19) 2363 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2364 } 2365 } 2366 } 2367 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,n_div_16_subtile)2368 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_div_16_subtile) { 2369 TEST_REQUIRES_ARM_NEON; 2370 for (uint32_t n = 32; n <= 48; n += 16) { 2371 for (size_t k = 1; k <= 40; k += 9) { 2372 for (uint32_t m = 1; m <= 4; m++) { 2373 GemmMicrokernelTester() 2374 .mr(4) 2375 .nr(16) 2376 .kr(1) 2377 .sr(1) 2378 .m(m) 2379 .n(n) 2380 .k(k) 2381 .iterations(1) 2382 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2383 } 2384 } 2385 } 2386 } 2387 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,small_kernel)2388 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, small_kernel) { 2389 TEST_REQUIRES_ARM_NEON; 2390 for (size_t k = 1; k <= 40; k += 9) { 2391 GemmMicrokernelTester() 2392 .mr(4) 2393 .nr(16) 2394 .kr(1) 2395 .sr(1) 2396 .m(4) 2397 .n(16) 2398 .k(k) 2399 .ks(3) 2400 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2401 } 2402 } 2403 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,small_kernel_subtile)2404 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, small_kernel_subtile) { 2405 TEST_REQUIRES_ARM_NEON; 2406 for (size_t k = 1; k <= 40; k += 9) { 2407 for (uint32_t n = 1; n <= 16; n++) { 2408 for (uint32_t m = 1; m <= 4; m++) { 2409 GemmMicrokernelTester() 2410 .mr(4) 2411 .nr(16) 2412 .kr(1) 2413 .sr(1) 2414 .m(m) 2415 .n(n) 2416 .k(k) 2417 .ks(3) 2418 .iterations(1) 2419 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2420 } 2421 } 2422 } 2423 } 2424 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,n_gt_16_small_kernel)2425 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_gt_16_small_kernel) { 2426 TEST_REQUIRES_ARM_NEON; 2427 for (uint32_t n = 17; n < 32; n++) { 2428 for (size_t k = 1; k <= 40; k += 9) { 2429 GemmMicrokernelTester() 2430 .mr(4) 2431 .nr(16) 2432 .kr(1) 2433 .sr(1) 2434 .m(4) 2435 .n(n) 2436 .k(k) 2437 .ks(3) 2438 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2439 } 2440 } 2441 } 2442 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,n_div_16_small_kernel)2443 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_div_16_small_kernel) { 2444 TEST_REQUIRES_ARM_NEON; 2445 for (uint32_t n = 32; n <= 48; n += 16) { 2446 for (size_t k = 1; k <= 40; k += 9) { 2447 GemmMicrokernelTester() 2448 .mr(4) 2449 .nr(16) 2450 .kr(1) 2451 .sr(1) 2452 .m(4) 2453 .n(n) 2454 .k(k) 2455 .ks(3) 2456 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2457 } 2458 } 2459 } 2460 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,strided_cm_subtile)2461 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, strided_cm_subtile) { 2462 TEST_REQUIRES_ARM_NEON; 2463 for (size_t k = 1; k <= 40; k += 9) { 2464 for (uint32_t n = 1; n <= 16; n++) { 2465 for (uint32_t m = 1; m <= 4; m++) { 2466 GemmMicrokernelTester() 2467 .mr(4) 2468 .nr(16) 2469 .kr(1) 2470 .sr(1) 2471 .m(m) 2472 .n(n) 2473 .k(k) 2474 .cm_stride(19) 2475 .iterations(1) 2476 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2477 } 2478 } 2479 } 2480 } 2481 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,a_offset)2482 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, a_offset) { 2483 TEST_REQUIRES_ARM_NEON; 2484 for (size_t k = 1; k <= 40; k += 9) { 2485 GemmMicrokernelTester() 2486 .mr(4) 2487 .nr(16) 2488 .kr(1) 2489 .sr(1) 2490 .m(4) 2491 .n(16) 2492 .k(k) 2493 .ks(3) 2494 .a_offset(163) 2495 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2496 } 2497 } 2498 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,zero)2499 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, zero) { 2500 TEST_REQUIRES_ARM_NEON; 2501 for (size_t k = 1; k <= 40; k += 9) { 2502 for (uint32_t mz = 0; mz < 4; mz++) { 2503 GemmMicrokernelTester() 2504 .mr(4) 2505 .nr(16) 2506 .kr(1) 2507 .sr(1) 2508 .m(4) 2509 .n(16) 2510 .k(k) 2511 .ks(3) 2512 .a_offset(163) 2513 .zero_index(mz) 2514 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2515 } 2516 } 2517 } 2518 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,qmin)2519 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, qmin) { 2520 TEST_REQUIRES_ARM_NEON; 2521 GemmMicrokernelTester() 2522 .mr(4) 2523 .nr(16) 2524 .kr(1) 2525 .sr(1) 2526 .m(4) 2527 .n(16) 2528 .k(8) 2529 .qmin(128) 2530 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2531 } 2532 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,qmax)2533 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, qmax) { 2534 TEST_REQUIRES_ARM_NEON; 2535 GemmMicrokernelTester() 2536 .mr(4) 2537 .nr(16) 2538 .kr(1) 2539 .sr(1) 2540 .m(4) 2541 .n(16) 2542 .k(8) 2543 .qmax(128) 2544 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2545 } 2546 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,strided_cm)2547 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, strided_cm) { 2548 TEST_REQUIRES_ARM_NEON; 2549 GemmMicrokernelTester() 2550 .mr(4) 2551 .nr(16) 2552 .kr(1) 2553 .sr(1) 2554 .m(4) 2555 .n(16) 2556 .k(8) 2557 .cm_stride(19) 2558 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2559 } 2560 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,no_a_zero_point)2561 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, no_a_zero_point) { 2562 TEST_REQUIRES_ARM_NEON; 2563 for (size_t k = 1; k <= 40; k += 9) { 2564 GemmMicrokernelTester() 2565 .mr(4) 2566 .nr(16) 2567 .kr(1) 2568 .sr(1) 2569 .m(4) 2570 .n(16) 2571 .k(k) 2572 .a_zero_point(0) 2573 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2574 } 2575 } 2576 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,no_b_zero_point)2577 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, no_b_zero_point) { 2578 TEST_REQUIRES_ARM_NEON; 2579 for (size_t k = 1; k <= 40; k += 9) { 2580 GemmMicrokernelTester() 2581 .mr(4) 2582 .nr(16) 2583 .kr(1) 2584 .sr(1) 2585 .m(4) 2586 .n(16) 2587 .k(k) 2588 .b_zero_point(0) 2589 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2590 } 2591 } 2592 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75,no_zero_point)2593 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, no_zero_point) { 2594 TEST_REQUIRES_ARM_NEON; 2595 for (size_t k = 1; k <= 40; k += 9) { 2596 GemmMicrokernelTester() 2597 .mr(4) 2598 .nr(16) 2599 .kr(1) 2600 .sr(1) 2601 .m(4) 2602 .n(16) 2603 .k(k) 2604 .a_zero_point(0) 2605 .b_zero_point(0) 2606 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2607 } 2608 } 2609 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY 2610 2611 2612 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16)2613 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16) { 2614 TEST_REQUIRES_ARM_NEON_DOT; 2615 GemmMicrokernelTester() 2616 .mr(4) 2617 .nr(16) 2618 .kr(4) 2619 .sr(1) 2620 .m(4) 2621 .n(16) 2622 .k(16) 2623 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2624 } 2625 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cn)2626 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cn) { 2627 TEST_REQUIRES_ARM_NEON_DOT; 2628 GemmMicrokernelTester() 2629 .mr(4) 2630 .nr(16) 2631 .kr(4) 2632 .sr(1) 2633 .m(4) 2634 .n(16) 2635 .k(16) 2636 .cn_stride(19) 2637 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2638 } 2639 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile)2640 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile) { 2641 TEST_REQUIRES_ARM_NEON_DOT; 2642 for (uint32_t n = 1; n <= 16; n++) { 2643 for (uint32_t m = 1; m <= 4; m++) { 2644 GemmMicrokernelTester() 2645 .mr(4) 2646 .nr(16) 2647 .kr(4) 2648 .sr(1) 2649 .m(m) 2650 .n(n) 2651 .k(16) 2652 .iterations(1) 2653 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2654 } 2655 } 2656 } 2657 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile_m)2658 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_m) { 2659 TEST_REQUIRES_ARM_NEON_DOT; 2660 for (uint32_t m = 1; m <= 4; m++) { 2661 GemmMicrokernelTester() 2662 .mr(4) 2663 .nr(16) 2664 .kr(4) 2665 .sr(1) 2666 .m(m) 2667 .n(16) 2668 .k(16) 2669 .iterations(1) 2670 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2671 } 2672 } 2673 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile_n)2674 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_n) { 2675 TEST_REQUIRES_ARM_NEON_DOT; 2676 for (uint32_t n = 1; n <= 16; n++) { 2677 GemmMicrokernelTester() 2678 .mr(4) 2679 .nr(16) 2680 .kr(4) 2681 .sr(1) 2682 .m(4) 2683 .n(n) 2684 .k(16) 2685 .iterations(1) 2686 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2687 } 2688 } 2689 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_16)2690 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16) { 2691 TEST_REQUIRES_ARM_NEON_DOT; 2692 for (size_t k = 1; k < 16; k++) { 2693 GemmMicrokernelTester() 2694 .mr(4) 2695 .nr(16) 2696 .kr(4) 2697 .sr(1) 2698 .m(4) 2699 .n(16) 2700 .k(k) 2701 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2702 } 2703 } 2704 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_16_subtile)2705 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16_subtile) { 2706 TEST_REQUIRES_ARM_NEON_DOT; 2707 for (size_t k = 1; k < 16; k++) { 2708 for (uint32_t n = 1; n <= 16; n++) { 2709 for (uint32_t m = 1; m <= 4; m++) { 2710 GemmMicrokernelTester() 2711 .mr(4) 2712 .nr(16) 2713 .kr(4) 2714 .sr(1) 2715 .m(m) 2716 .n(n) 2717 .k(k) 2718 .iterations(1) 2719 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2720 } 2721 } 2722 } 2723 } 2724 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_16)2725 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16) { 2726 TEST_REQUIRES_ARM_NEON_DOT; 2727 for (size_t k = 17; k < 32; k++) { 2728 GemmMicrokernelTester() 2729 .mr(4) 2730 .nr(16) 2731 .kr(4) 2732 .sr(1) 2733 .m(4) 2734 .n(16) 2735 .k(k) 2736 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2737 } 2738 } 2739 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_16_subtile)2740 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16_subtile) { 2741 TEST_REQUIRES_ARM_NEON_DOT; 2742 for (size_t k = 17; k < 32; k++) { 2743 for (uint32_t n = 1; n <= 16; n++) { 2744 for (uint32_t m = 1; m <= 4; m++) { 2745 GemmMicrokernelTester() 2746 .mr(4) 2747 .nr(16) 2748 .kr(4) 2749 .sr(1) 2750 .m(m) 2751 .n(n) 2752 .k(k) 2753 .iterations(1) 2754 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2755 } 2756 } 2757 } 2758 } 2759 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_div_16)2760 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16) { 2761 TEST_REQUIRES_ARM_NEON_DOT; 2762 for (size_t k = 32; k <= 160; k += 16) { 2763 GemmMicrokernelTester() 2764 .mr(4) 2765 .nr(16) 2766 .kr(4) 2767 .sr(1) 2768 .m(4) 2769 .n(16) 2770 .k(k) 2771 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2772 } 2773 } 2774 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_div_16_subtile)2775 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16_subtile) { 2776 TEST_REQUIRES_ARM_NEON_DOT; 2777 for (size_t k = 32; k <= 160; k += 16) { 2778 for (uint32_t n = 1; n <= 16; n++) { 2779 for (uint32_t m = 1; m <= 4; m++) { 2780 GemmMicrokernelTester() 2781 .mr(4) 2782 .nr(16) 2783 .kr(4) 2784 .sr(1) 2785 .m(m) 2786 .n(n) 2787 .k(k) 2788 .iterations(1) 2789 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2790 } 2791 } 2792 } 2793 } 2794 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16)2795 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16) { 2796 TEST_REQUIRES_ARM_NEON_DOT; 2797 for (uint32_t n = 17; n < 32; n++) { 2798 for (size_t k = 1; k <= 80; k += 17) { 2799 GemmMicrokernelTester() 2800 .mr(4) 2801 .nr(16) 2802 .kr(4) 2803 .sr(1) 2804 .m(4) 2805 .n(n) 2806 .k(k) 2807 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2808 } 2809 } 2810 } 2811 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_strided_cn)2812 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_strided_cn) { 2813 TEST_REQUIRES_ARM_NEON_DOT; 2814 for (uint32_t n = 17; n < 32; n++) { 2815 for (size_t k = 1; k <= 80; k += 17) { 2816 GemmMicrokernelTester() 2817 .mr(4) 2818 .nr(16) 2819 .kr(4) 2820 .sr(1) 2821 .m(4) 2822 .n(n) 2823 .k(k) 2824 .cn_stride(19) 2825 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2826 } 2827 } 2828 } 2829 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_subtile)2830 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_subtile) { 2831 TEST_REQUIRES_ARM_NEON_DOT; 2832 for (uint32_t n = 17; n < 32; n++) { 2833 for (size_t k = 1; k <= 80; k += 17) { 2834 for (uint32_t m = 1; m <= 4; m++) { 2835 GemmMicrokernelTester() 2836 .mr(4) 2837 .nr(16) 2838 .kr(4) 2839 .sr(1) 2840 .m(m) 2841 .n(n) 2842 .k(k) 2843 .iterations(1) 2844 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2845 } 2846 } 2847 } 2848 } 2849 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16)2850 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16) { 2851 TEST_REQUIRES_ARM_NEON_DOT; 2852 for (uint32_t n = 32; n <= 48; n += 16) { 2853 for (size_t k = 1; k <= 80; k += 17) { 2854 GemmMicrokernelTester() 2855 .mr(4) 2856 .nr(16) 2857 .kr(4) 2858 .sr(1) 2859 .m(4) 2860 .n(n) 2861 .k(k) 2862 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2863 } 2864 } 2865 } 2866 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_strided_cn)2867 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_strided_cn) { 2868 TEST_REQUIRES_ARM_NEON_DOT; 2869 for (uint32_t n = 32; n <= 48; n += 16) { 2870 for (size_t k = 1; k <= 80; k += 17) { 2871 GemmMicrokernelTester() 2872 .mr(4) 2873 .nr(16) 2874 .kr(4) 2875 .sr(1) 2876 .m(4) 2877 .n(n) 2878 .k(k) 2879 .cn_stride(19) 2880 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2881 } 2882 } 2883 } 2884 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_subtile)2885 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_subtile) { 2886 TEST_REQUIRES_ARM_NEON_DOT; 2887 for (uint32_t n = 32; n <= 48; n += 16) { 2888 for (size_t k = 1; k <= 80; k += 17) { 2889 for (uint32_t m = 1; m <= 4; m++) { 2890 GemmMicrokernelTester() 2891 .mr(4) 2892 .nr(16) 2893 .kr(4) 2894 .sr(1) 2895 .m(m) 2896 .n(n) 2897 .k(k) 2898 .iterations(1) 2899 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2900 } 2901 } 2902 } 2903 } 2904 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,small_kernel)2905 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel) { 2906 TEST_REQUIRES_ARM_NEON_DOT; 2907 for (size_t k = 1; k <= 80; k += 17) { 2908 GemmMicrokernelTester() 2909 .mr(4) 2910 .nr(16) 2911 .kr(4) 2912 .sr(1) 2913 .m(4) 2914 .n(16) 2915 .k(k) 2916 .ks(3) 2917 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2918 } 2919 } 2920 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,small_kernel_subtile)2921 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel_subtile) { 2922 TEST_REQUIRES_ARM_NEON_DOT; 2923 for (size_t k = 1; k <= 80; k += 17) { 2924 for (uint32_t n = 1; n <= 16; n++) { 2925 for (uint32_t m = 1; m <= 4; m++) { 2926 GemmMicrokernelTester() 2927 .mr(4) 2928 .nr(16) 2929 .kr(4) 2930 .sr(1) 2931 .m(m) 2932 .n(n) 2933 .k(k) 2934 .ks(3) 2935 .iterations(1) 2936 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2937 } 2938 } 2939 } 2940 } 2941 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_small_kernel)2942 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_small_kernel) { 2943 TEST_REQUIRES_ARM_NEON_DOT; 2944 for (uint32_t n = 17; n < 32; n++) { 2945 for (size_t k = 1; k <= 80; k += 17) { 2946 GemmMicrokernelTester() 2947 .mr(4) 2948 .nr(16) 2949 .kr(4) 2950 .sr(1) 2951 .m(4) 2952 .n(n) 2953 .k(k) 2954 .ks(3) 2955 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2956 } 2957 } 2958 } 2959 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_small_kernel)2960 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_small_kernel) { 2961 TEST_REQUIRES_ARM_NEON_DOT; 2962 for (uint32_t n = 32; n <= 48; n += 16) { 2963 for (size_t k = 1; k <= 80; k += 17) { 2964 GemmMicrokernelTester() 2965 .mr(4) 2966 .nr(16) 2967 .kr(4) 2968 .sr(1) 2969 .m(4) 2970 .n(n) 2971 .k(k) 2972 .ks(3) 2973 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2974 } 2975 } 2976 } 2977 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cm_subtile)2978 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm_subtile) { 2979 TEST_REQUIRES_ARM_NEON_DOT; 2980 for (size_t k = 1; k <= 80; k += 17) { 2981 for (uint32_t n = 1; n <= 16; n++) { 2982 for (uint32_t m = 1; m <= 4; m++) { 2983 GemmMicrokernelTester() 2984 .mr(4) 2985 .nr(16) 2986 .kr(4) 2987 .sr(1) 2988 .m(m) 2989 .n(n) 2990 .k(k) 2991 .cm_stride(19) 2992 .iterations(1) 2993 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2994 } 2995 } 2996 } 2997 } 2998 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,a_offset)2999 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, a_offset) { 3000 TEST_REQUIRES_ARM_NEON_DOT; 3001 for (size_t k = 1; k <= 80; k += 17) { 3002 GemmMicrokernelTester() 3003 .mr(4) 3004 .nr(16) 3005 .kr(4) 3006 .sr(1) 3007 .m(4) 3008 .n(16) 3009 .k(k) 3010 .ks(3) 3011 .a_offset(331) 3012 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3013 } 3014 } 3015 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,zero)3016 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, zero) { 3017 TEST_REQUIRES_ARM_NEON_DOT; 3018 for (size_t k = 1; k <= 80; k += 17) { 3019 for (uint32_t mz = 0; mz < 4; mz++) { 3020 GemmMicrokernelTester() 3021 .mr(4) 3022 .nr(16) 3023 .kr(4) 3024 .sr(1) 3025 .m(4) 3026 .n(16) 3027 .k(k) 3028 .ks(3) 3029 .a_offset(331) 3030 .zero_index(mz) 3031 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3032 } 3033 } 3034 } 3035 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,qmin)3036 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmin) { 3037 TEST_REQUIRES_ARM_NEON_DOT; 3038 GemmMicrokernelTester() 3039 .mr(4) 3040 .nr(16) 3041 .kr(4) 3042 .sr(1) 3043 .m(4) 3044 .n(16) 3045 .k(16) 3046 .qmin(128) 3047 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3048 } 3049 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,qmax)3050 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmax) { 3051 TEST_REQUIRES_ARM_NEON_DOT; 3052 GemmMicrokernelTester() 3053 .mr(4) 3054 .nr(16) 3055 .kr(4) 3056 .sr(1) 3057 .m(4) 3058 .n(16) 3059 .k(16) 3060 .qmax(128) 3061 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3062 } 3063 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cm)3064 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm) { 3065 TEST_REQUIRES_ARM_NEON_DOT; 3066 GemmMicrokernelTester() 3067 .mr(4) 3068 .nr(16) 3069 .kr(4) 3070 .sr(1) 3071 .m(4) 3072 .n(16) 3073 .k(16) 3074 .cm_stride(19) 3075 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3076 } 3077 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,no_a_zero_point)3078 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, no_a_zero_point) { 3079 TEST_REQUIRES_ARM_NEON_DOT; 3080 for (size_t k = 1; k <= 80; k += 17) { 3081 GemmMicrokernelTester() 3082 .mr(4) 3083 .nr(16) 3084 .kr(4) 3085 .sr(1) 3086 .m(4) 3087 .n(16) 3088 .k(k) 3089 .a_zero_point(0) 3090 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3091 } 3092 } 3093 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,no_b_zero_point)3094 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, no_b_zero_point) { 3095 TEST_REQUIRES_ARM_NEON_DOT; 3096 for (size_t k = 1; k <= 80; k += 17) { 3097 GemmMicrokernelTester() 3098 .mr(4) 3099 .nr(16) 3100 .kr(4) 3101 .sr(1) 3102 .m(4) 3103 .n(16) 3104 .k(k) 3105 .b_zero_point(0) 3106 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3107 } 3108 } 3109 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55,no_zero_point)3110 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, no_zero_point) { 3111 TEST_REQUIRES_ARM_NEON_DOT; 3112 for (size_t k = 1; k <= 80; k += 17) { 3113 GemmMicrokernelTester() 3114 .mr(4) 3115 .nr(16) 3116 .kr(4) 3117 .sr(1) 3118 .m(4) 3119 .n(16) 3120 .k(k) 3121 .a_zero_point(0) 3122 .b_zero_point(0) 3123 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3124 } 3125 } 3126 #endif // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY 3127 3128 3129 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16)3130 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16) { 3131 TEST_REQUIRES_ARM_NEON_DOT; 3132 GemmMicrokernelTester() 3133 .mr(4) 3134 .nr(16) 3135 .kr(4) 3136 .sr(1) 3137 .m(4) 3138 .n(16) 3139 .k(16) 3140 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3141 } 3142 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,strided_cn)3143 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, strided_cn) { 3144 TEST_REQUIRES_ARM_NEON_DOT; 3145 GemmMicrokernelTester() 3146 .mr(4) 3147 .nr(16) 3148 .kr(4) 3149 .sr(1) 3150 .m(4) 3151 .n(16) 3152 .k(16) 3153 .cn_stride(19) 3154 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3155 } 3156 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile)3157 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile) { 3158 TEST_REQUIRES_ARM_NEON_DOT; 3159 for (uint32_t n = 1; n <= 16; n++) { 3160 for (uint32_t m = 1; m <= 4; m++) { 3161 GemmMicrokernelTester() 3162 .mr(4) 3163 .nr(16) 3164 .kr(4) 3165 .sr(1) 3166 .m(m) 3167 .n(n) 3168 .k(16) 3169 .iterations(1) 3170 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3171 } 3172 } 3173 } 3174 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile_m)3175 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_m) { 3176 TEST_REQUIRES_ARM_NEON_DOT; 3177 for (uint32_t m = 1; m <= 4; m++) { 3178 GemmMicrokernelTester() 3179 .mr(4) 3180 .nr(16) 3181 .kr(4) 3182 .sr(1) 3183 .m(m) 3184 .n(16) 3185 .k(16) 3186 .iterations(1) 3187 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3188 } 3189 } 3190 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile_n)3191 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_n) { 3192 TEST_REQUIRES_ARM_NEON_DOT; 3193 for (uint32_t n = 1; n <= 16; n++) { 3194 GemmMicrokernelTester() 3195 .mr(4) 3196 .nr(16) 3197 .kr(4) 3198 .sr(1) 3199 .m(4) 3200 .n(n) 3201 .k(16) 3202 .iterations(1) 3203 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3204 } 3205 } 3206 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_lt_16)3207 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_lt_16) { 3208 TEST_REQUIRES_ARM_NEON_DOT; 3209 for (size_t k = 1; k < 16; k++) { 3210 GemmMicrokernelTester() 3211 .mr(4) 3212 .nr(16) 3213 .kr(4) 3214 .sr(1) 3215 .m(4) 3216 .n(16) 3217 .k(k) 3218 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3219 } 3220 } 3221 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_lt_16_subtile)3222 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_lt_16_subtile) { 3223 TEST_REQUIRES_ARM_NEON_DOT; 3224 for (size_t k = 1; k < 16; k++) { 3225 for (uint32_t n = 1; n <= 16; n++) { 3226 for (uint32_t m = 1; m <= 4; m++) { 3227 GemmMicrokernelTester() 3228 .mr(4) 3229 .nr(16) 3230 .kr(4) 3231 .sr(1) 3232 .m(m) 3233 .n(n) 3234 .k(k) 3235 .iterations(1) 3236 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3237 } 3238 } 3239 } 3240 } 3241 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_gt_16)3242 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_gt_16) { 3243 TEST_REQUIRES_ARM_NEON_DOT; 3244 for (size_t k = 17; k < 32; k++) { 3245 GemmMicrokernelTester() 3246 .mr(4) 3247 .nr(16) 3248 .kr(4) 3249 .sr(1) 3250 .m(4) 3251 .n(16) 3252 .k(k) 3253 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3254 } 3255 } 3256 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_gt_16_subtile)3257 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_gt_16_subtile) { 3258 TEST_REQUIRES_ARM_NEON_DOT; 3259 for (size_t k = 17; k < 32; k++) { 3260 for (uint32_t n = 1; n <= 16; n++) { 3261 for (uint32_t m = 1; m <= 4; m++) { 3262 GemmMicrokernelTester() 3263 .mr(4) 3264 .nr(16) 3265 .kr(4) 3266 .sr(1) 3267 .m(m) 3268 .n(n) 3269 .k(k) 3270 .iterations(1) 3271 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3272 } 3273 } 3274 } 3275 } 3276 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_div_16)3277 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_div_16) { 3278 TEST_REQUIRES_ARM_NEON_DOT; 3279 for (size_t k = 32; k <= 160; k += 16) { 3280 GemmMicrokernelTester() 3281 .mr(4) 3282 .nr(16) 3283 .kr(4) 3284 .sr(1) 3285 .m(4) 3286 .n(16) 3287 .k(k) 3288 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3289 } 3290 } 3291 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_div_16_subtile)3292 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_div_16_subtile) { 3293 TEST_REQUIRES_ARM_NEON_DOT; 3294 for (size_t k = 32; k <= 160; k += 16) { 3295 for (uint32_t n = 1; n <= 16; n++) { 3296 for (uint32_t m = 1; m <= 4; m++) { 3297 GemmMicrokernelTester() 3298 .mr(4) 3299 .nr(16) 3300 .kr(4) 3301 .sr(1) 3302 .m(m) 3303 .n(n) 3304 .k(k) 3305 .iterations(1) 3306 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3307 } 3308 } 3309 } 3310 } 3311 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16)3312 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16) { 3313 TEST_REQUIRES_ARM_NEON_DOT; 3314 for (uint32_t n = 17; n < 32; n++) { 3315 for (size_t k = 1; k <= 80; k += 17) { 3316 GemmMicrokernelTester() 3317 .mr(4) 3318 .nr(16) 3319 .kr(4) 3320 .sr(1) 3321 .m(4) 3322 .n(n) 3323 .k(k) 3324 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3325 } 3326 } 3327 } 3328 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_strided_cn)3329 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_strided_cn) { 3330 TEST_REQUIRES_ARM_NEON_DOT; 3331 for (uint32_t n = 17; n < 32; n++) { 3332 for (size_t k = 1; k <= 80; k += 17) { 3333 GemmMicrokernelTester() 3334 .mr(4) 3335 .nr(16) 3336 .kr(4) 3337 .sr(1) 3338 .m(4) 3339 .n(n) 3340 .k(k) 3341 .cn_stride(19) 3342 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3343 } 3344 } 3345 } 3346 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_subtile)3347 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_subtile) { 3348 TEST_REQUIRES_ARM_NEON_DOT; 3349 for (uint32_t n = 17; n < 32; n++) { 3350 for (size_t k = 1; k <= 80; k += 17) { 3351 for (uint32_t m = 1; m <= 4; m++) { 3352 GemmMicrokernelTester() 3353 .mr(4) 3354 .nr(16) 3355 .kr(4) 3356 .sr(1) 3357 .m(m) 3358 .n(n) 3359 .k(k) 3360 .iterations(1) 3361 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3362 } 3363 } 3364 } 3365 } 3366 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,n_div_16)3367 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, n_div_16) { 3368 TEST_REQUIRES_ARM_NEON_DOT; 3369 for (uint32_t n = 32; n <= 48; n += 16) { 3370 for (size_t k = 1; k <= 80; k += 17) { 3371 GemmMicrokernelTester() 3372 .mr(4) 3373 .nr(16) 3374 .kr(4) 3375 .sr(1) 3376 .m(4) 3377 .n(n) 3378 .k(k) 3379 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3380 } 3381 } 3382 } 3383 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_strided_cn)3384 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_strided_cn) { 3385 TEST_REQUIRES_ARM_NEON_DOT; 3386 for (uint32_t n = 32; n <= 48; n += 16) { 3387 for (size_t k = 1; k <= 80; k += 17) { 3388 GemmMicrokernelTester() 3389 .mr(4) 3390 .nr(16) 3391 .kr(4) 3392 .sr(1) 3393 .m(4) 3394 .n(n) 3395 .k(k) 3396 .cn_stride(19) 3397 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3398 } 3399 } 3400 } 3401 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_subtile)3402 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_subtile) { 3403 TEST_REQUIRES_ARM_NEON_DOT; 3404 for (uint32_t n = 32; n <= 48; n += 16) { 3405 for (size_t k = 1; k <= 80; k += 17) { 3406 for (uint32_t m = 1; m <= 4; m++) { 3407 GemmMicrokernelTester() 3408 .mr(4) 3409 .nr(16) 3410 .kr(4) 3411 .sr(1) 3412 .m(m) 3413 .n(n) 3414 .k(k) 3415 .iterations(1) 3416 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3417 } 3418 } 3419 } 3420 } 3421 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,small_kernel)3422 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, small_kernel) { 3423 TEST_REQUIRES_ARM_NEON_DOT; 3424 for (size_t k = 1; k <= 80; k += 17) { 3425 GemmMicrokernelTester() 3426 .mr(4) 3427 .nr(16) 3428 .kr(4) 3429 .sr(1) 3430 .m(4) 3431 .n(16) 3432 .k(k) 3433 .ks(3) 3434 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3435 } 3436 } 3437 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,small_kernel_subtile)3438 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, small_kernel_subtile) { 3439 TEST_REQUIRES_ARM_NEON_DOT; 3440 for (size_t k = 1; k <= 80; k += 17) { 3441 for (uint32_t n = 1; n <= 16; n++) { 3442 for (uint32_t m = 1; m <= 4; m++) { 3443 GemmMicrokernelTester() 3444 .mr(4) 3445 .nr(16) 3446 .kr(4) 3447 .sr(1) 3448 .m(m) 3449 .n(n) 3450 .k(k) 3451 .ks(3) 3452 .iterations(1) 3453 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3454 } 3455 } 3456 } 3457 } 3458 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_small_kernel)3459 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_small_kernel) { 3460 TEST_REQUIRES_ARM_NEON_DOT; 3461 for (uint32_t n = 17; n < 32; n++) { 3462 for (size_t k = 1; k <= 80; k += 17) { 3463 GemmMicrokernelTester() 3464 .mr(4) 3465 .nr(16) 3466 .kr(4) 3467 .sr(1) 3468 .m(4) 3469 .n(n) 3470 .k(k) 3471 .ks(3) 3472 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3473 } 3474 } 3475 } 3476 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_small_kernel)3477 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_small_kernel) { 3478 TEST_REQUIRES_ARM_NEON_DOT; 3479 for (uint32_t n = 32; n <= 48; n += 16) { 3480 for (size_t k = 1; k <= 80; k += 17) { 3481 GemmMicrokernelTester() 3482 .mr(4) 3483 .nr(16) 3484 .kr(4) 3485 .sr(1) 3486 .m(4) 3487 .n(n) 3488 .k(k) 3489 .ks(3) 3490 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3491 } 3492 } 3493 } 3494 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,strided_cm_subtile)3495 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, strided_cm_subtile) { 3496 TEST_REQUIRES_ARM_NEON_DOT; 3497 for (size_t k = 1; k <= 80; k += 17) { 3498 for (uint32_t n = 1; n <= 16; n++) { 3499 for (uint32_t m = 1; m <= 4; m++) { 3500 GemmMicrokernelTester() 3501 .mr(4) 3502 .nr(16) 3503 .kr(4) 3504 .sr(1) 3505 .m(m) 3506 .n(n) 3507 .k(k) 3508 .cm_stride(19) 3509 .iterations(1) 3510 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3511 } 3512 } 3513 } 3514 } 3515 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,a_offset)3516 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, a_offset) { 3517 TEST_REQUIRES_ARM_NEON_DOT; 3518 for (size_t k = 1; k <= 80; k += 17) { 3519 GemmMicrokernelTester() 3520 .mr(4) 3521 .nr(16) 3522 .kr(4) 3523 .sr(1) 3524 .m(4) 3525 .n(16) 3526 .k(k) 3527 .ks(3) 3528 .a_offset(331) 3529 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3530 } 3531 } 3532 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,zero)3533 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, zero) { 3534 TEST_REQUIRES_ARM_NEON_DOT; 3535 for (size_t k = 1; k <= 80; k += 17) { 3536 for (uint32_t mz = 0; mz < 4; mz++) { 3537 GemmMicrokernelTester() 3538 .mr(4) 3539 .nr(16) 3540 .kr(4) 3541 .sr(1) 3542 .m(4) 3543 .n(16) 3544 .k(k) 3545 .ks(3) 3546 .a_offset(331) 3547 .zero_index(mz) 3548 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3549 } 3550 } 3551 } 3552 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,qmin)3553 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, qmin) { 3554 TEST_REQUIRES_ARM_NEON_DOT; 3555 GemmMicrokernelTester() 3556 .mr(4) 3557 .nr(16) 3558 .kr(4) 3559 .sr(1) 3560 .m(4) 3561 .n(16) 3562 .k(16) 3563 .qmin(128) 3564 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3565 } 3566 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,qmax)3567 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, qmax) { 3568 TEST_REQUIRES_ARM_NEON_DOT; 3569 GemmMicrokernelTester() 3570 .mr(4) 3571 .nr(16) 3572 .kr(4) 3573 .sr(1) 3574 .m(4) 3575 .n(16) 3576 .k(16) 3577 .qmax(128) 3578 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3579 } 3580 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,strided_cm)3581 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, strided_cm) { 3582 TEST_REQUIRES_ARM_NEON_DOT; 3583 GemmMicrokernelTester() 3584 .mr(4) 3585 .nr(16) 3586 .kr(4) 3587 .sr(1) 3588 .m(4) 3589 .n(16) 3590 .k(16) 3591 .cm_stride(19) 3592 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3593 } 3594 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,no_a_zero_point)3595 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, no_a_zero_point) { 3596 TEST_REQUIRES_ARM_NEON_DOT; 3597 for (size_t k = 1; k <= 80; k += 17) { 3598 GemmMicrokernelTester() 3599 .mr(4) 3600 .nr(16) 3601 .kr(4) 3602 .sr(1) 3603 .m(4) 3604 .n(16) 3605 .k(k) 3606 .a_zero_point(0) 3607 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3608 } 3609 } 3610 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,no_b_zero_point)3611 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, no_b_zero_point) { 3612 TEST_REQUIRES_ARM_NEON_DOT; 3613 for (size_t k = 1; k <= 80; k += 17) { 3614 GemmMicrokernelTester() 3615 .mr(4) 3616 .nr(16) 3617 .kr(4) 3618 .sr(1) 3619 .m(4) 3620 .n(16) 3621 .k(k) 3622 .b_zero_point(0) 3623 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3624 } 3625 } 3626 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,no_zero_point)3627 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, no_zero_point) { 3628 TEST_REQUIRES_ARM_NEON_DOT; 3629 for (size_t k = 1; k <= 80; k += 17) { 3630 GemmMicrokernelTester() 3631 .mr(4) 3632 .nr(16) 3633 .kr(4) 3634 .sr(1) 3635 .m(4) 3636 .n(16) 3637 .k(k) 3638 .a_zero_point(0) 3639 .b_zero_point(0) 3640 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3641 } 3642 } 3643 #endif // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY 3644 3645 3646 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_eq_8)3647 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8) { 3648 TEST_REQUIRES_ARM_NEON; 3649 GemmMicrokernelTester() 3650 .mr(1) 3651 .nr(8) 3652 .kr(1) 3653 .sr(1) 3654 .m(1) 3655 .n(8) 3656 .k(8) 3657 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3658 } 3659 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,strided_cn)3660 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, strided_cn) { 3661 TEST_REQUIRES_ARM_NEON; 3662 GemmMicrokernelTester() 3663 .mr(1) 3664 .nr(8) 3665 .kr(1) 3666 .sr(1) 3667 .m(1) 3668 .n(8) 3669 .k(8) 3670 .cn_stride(11) 3671 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3672 } 3673 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_eq_8_subtile)3674 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8_subtile) { 3675 TEST_REQUIRES_ARM_NEON; 3676 for (uint32_t n = 1; n <= 8; n++) { 3677 for (uint32_t m = 1; m <= 1; m++) { 3678 GemmMicrokernelTester() 3679 .mr(1) 3680 .nr(8) 3681 .kr(1) 3682 .sr(1) 3683 .m(m) 3684 .n(n) 3685 .k(8) 3686 .iterations(1) 3687 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3688 } 3689 } 3690 } 3691 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_eq_8_subtile_m)3692 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8_subtile_m) { 3693 TEST_REQUIRES_ARM_NEON; 3694 for (uint32_t m = 1; m <= 1; m++) { 3695 GemmMicrokernelTester() 3696 .mr(1) 3697 .nr(8) 3698 .kr(1) 3699 .sr(1) 3700 .m(m) 3701 .n(8) 3702 .k(8) 3703 .iterations(1) 3704 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3705 } 3706 } 3707 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_eq_8_subtile_n)3708 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8_subtile_n) { 3709 TEST_REQUIRES_ARM_NEON; 3710 for (uint32_t n = 1; n <= 8; n++) { 3711 GemmMicrokernelTester() 3712 .mr(1) 3713 .nr(8) 3714 .kr(1) 3715 .sr(1) 3716 .m(1) 3717 .n(n) 3718 .k(8) 3719 .iterations(1) 3720 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3721 } 3722 } 3723 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_lt_8)3724 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_lt_8) { 3725 TEST_REQUIRES_ARM_NEON; 3726 for (size_t k = 1; k < 8; k++) { 3727 GemmMicrokernelTester() 3728 .mr(1) 3729 .nr(8) 3730 .kr(1) 3731 .sr(1) 3732 .m(1) 3733 .n(8) 3734 .k(k) 3735 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3736 } 3737 } 3738 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_lt_8_subtile)3739 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_lt_8_subtile) { 3740 TEST_REQUIRES_ARM_NEON; 3741 for (size_t k = 1; k < 8; k++) { 3742 for (uint32_t n = 1; n <= 8; n++) { 3743 for (uint32_t m = 1; m <= 1; m++) { 3744 GemmMicrokernelTester() 3745 .mr(1) 3746 .nr(8) 3747 .kr(1) 3748 .sr(1) 3749 .m(m) 3750 .n(n) 3751 .k(k) 3752 .iterations(1) 3753 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3754 } 3755 } 3756 } 3757 } 3758 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_gt_8)3759 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_gt_8) { 3760 TEST_REQUIRES_ARM_NEON; 3761 for (size_t k = 9; k < 16; k++) { 3762 GemmMicrokernelTester() 3763 .mr(1) 3764 .nr(8) 3765 .kr(1) 3766 .sr(1) 3767 .m(1) 3768 .n(8) 3769 .k(k) 3770 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3771 } 3772 } 3773 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_gt_8_subtile)3774 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_gt_8_subtile) { 3775 TEST_REQUIRES_ARM_NEON; 3776 for (size_t k = 9; k < 16; k++) { 3777 for (uint32_t n = 1; n <= 8; n++) { 3778 for (uint32_t m = 1; m <= 1; m++) { 3779 GemmMicrokernelTester() 3780 .mr(1) 3781 .nr(8) 3782 .kr(1) 3783 .sr(1) 3784 .m(m) 3785 .n(n) 3786 .k(k) 3787 .iterations(1) 3788 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3789 } 3790 } 3791 } 3792 } 3793 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_div_8)3794 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_div_8) { 3795 TEST_REQUIRES_ARM_NEON; 3796 for (size_t k = 16; k <= 80; k += 8) { 3797 GemmMicrokernelTester() 3798 .mr(1) 3799 .nr(8) 3800 .kr(1) 3801 .sr(1) 3802 .m(1) 3803 .n(8) 3804 .k(k) 3805 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3806 } 3807 } 3808 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,k_div_8_subtile)3809 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_div_8_subtile) { 3810 TEST_REQUIRES_ARM_NEON; 3811 for (size_t k = 16; k <= 80; k += 8) { 3812 for (uint32_t n = 1; n <= 8; n++) { 3813 for (uint32_t m = 1; m <= 1; m++) { 3814 GemmMicrokernelTester() 3815 .mr(1) 3816 .nr(8) 3817 .kr(1) 3818 .sr(1) 3819 .m(m) 3820 .n(n) 3821 .k(k) 3822 .iterations(1) 3823 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3824 } 3825 } 3826 } 3827 } 3828 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,n_gt_8)3829 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_gt_8) { 3830 TEST_REQUIRES_ARM_NEON; 3831 for (uint32_t n = 9; n < 16; n++) { 3832 for (size_t k = 1; k <= 40; k += 9) { 3833 GemmMicrokernelTester() 3834 .mr(1) 3835 .nr(8) 3836 .kr(1) 3837 .sr(1) 3838 .m(1) 3839 .n(n) 3840 .k(k) 3841 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3842 } 3843 } 3844 } 3845 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,n_gt_8_strided_cn)3846 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_gt_8_strided_cn) { 3847 TEST_REQUIRES_ARM_NEON; 3848 for (uint32_t n = 9; n < 16; n++) { 3849 for (size_t k = 1; k <= 40; k += 9) { 3850 GemmMicrokernelTester() 3851 .mr(1) 3852 .nr(8) 3853 .kr(1) 3854 .sr(1) 3855 .m(1) 3856 .n(n) 3857 .k(k) 3858 .cn_stride(11) 3859 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3860 } 3861 } 3862 } 3863 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,n_gt_8_subtile)3864 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_gt_8_subtile) { 3865 TEST_REQUIRES_ARM_NEON; 3866 for (uint32_t n = 9; n < 16; n++) { 3867 for (size_t k = 1; k <= 40; k += 9) { 3868 for (uint32_t m = 1; m <= 1; m++) { 3869 GemmMicrokernelTester() 3870 .mr(1) 3871 .nr(8) 3872 .kr(1) 3873 .sr(1) 3874 .m(m) 3875 .n(n) 3876 .k(k) 3877 .iterations(1) 3878 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3879 } 3880 } 3881 } 3882 } 3883 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,n_div_8)3884 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_div_8) { 3885 TEST_REQUIRES_ARM_NEON; 3886 for (uint32_t n = 16; n <= 24; n += 8) { 3887 for (size_t k = 1; k <= 40; k += 9) { 3888 GemmMicrokernelTester() 3889 .mr(1) 3890 .nr(8) 3891 .kr(1) 3892 .sr(1) 3893 .m(1) 3894 .n(n) 3895 .k(k) 3896 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3897 } 3898 } 3899 } 3900 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,n_div_8_strided_cn)3901 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_div_8_strided_cn) { 3902 TEST_REQUIRES_ARM_NEON; 3903 for (uint32_t n = 16; n <= 24; n += 8) { 3904 for (size_t k = 1; k <= 40; k += 9) { 3905 GemmMicrokernelTester() 3906 .mr(1) 3907 .nr(8) 3908 .kr(1) 3909 .sr(1) 3910 .m(1) 3911 .n(n) 3912 .k(k) 3913 .cn_stride(11) 3914 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3915 } 3916 } 3917 } 3918 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,n_div_8_subtile)3919 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_div_8_subtile) { 3920 TEST_REQUIRES_ARM_NEON; 3921 for (uint32_t n = 16; n <= 24; n += 8) { 3922 for (size_t k = 1; k <= 40; k += 9) { 3923 for (uint32_t m = 1; m <= 1; m++) { 3924 GemmMicrokernelTester() 3925 .mr(1) 3926 .nr(8) 3927 .kr(1) 3928 .sr(1) 3929 .m(m) 3930 .n(n) 3931 .k(k) 3932 .iterations(1) 3933 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3934 } 3935 } 3936 } 3937 } 3938 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,small_kernel)3939 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, small_kernel) { 3940 TEST_REQUIRES_ARM_NEON; 3941 for (size_t k = 1; k <= 40; k += 9) { 3942 GemmMicrokernelTester() 3943 .mr(1) 3944 .nr(8) 3945 .kr(1) 3946 .sr(1) 3947 .m(1) 3948 .n(8) 3949 .k(k) 3950 .ks(3) 3951 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3952 } 3953 } 3954 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,small_kernel_subtile)3955 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, small_kernel_subtile) { 3956 TEST_REQUIRES_ARM_NEON; 3957 for (size_t k = 1; k <= 40; k += 9) { 3958 for (uint32_t n = 1; n <= 8; n++) { 3959 for (uint32_t m = 1; m <= 1; m++) { 3960 GemmMicrokernelTester() 3961 .mr(1) 3962 .nr(8) 3963 .kr(1) 3964 .sr(1) 3965 .m(m) 3966 .n(n) 3967 .k(k) 3968 .ks(3) 3969 .iterations(1) 3970 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3971 } 3972 } 3973 } 3974 } 3975 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,n_gt_8_small_kernel)3976 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_gt_8_small_kernel) { 3977 TEST_REQUIRES_ARM_NEON; 3978 for (uint32_t n = 9; n < 16; n++) { 3979 for (size_t k = 1; k <= 40; k += 9) { 3980 GemmMicrokernelTester() 3981 .mr(1) 3982 .nr(8) 3983 .kr(1) 3984 .sr(1) 3985 .m(1) 3986 .n(n) 3987 .k(k) 3988 .ks(3) 3989 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3990 } 3991 } 3992 } 3993 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,n_div_8_small_kernel)3994 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_div_8_small_kernel) { 3995 TEST_REQUIRES_ARM_NEON; 3996 for (uint32_t n = 16; n <= 24; n += 8) { 3997 for (size_t k = 1; k <= 40; k += 9) { 3998 GemmMicrokernelTester() 3999 .mr(1) 4000 .nr(8) 4001 .kr(1) 4002 .sr(1) 4003 .m(1) 4004 .n(n) 4005 .k(k) 4006 .ks(3) 4007 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4008 } 4009 } 4010 } 4011 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,strided_cm_subtile)4012 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, strided_cm_subtile) { 4013 TEST_REQUIRES_ARM_NEON; 4014 for (size_t k = 1; k <= 40; k += 9) { 4015 for (uint32_t n = 1; n <= 8; n++) { 4016 for (uint32_t m = 1; m <= 1; m++) { 4017 GemmMicrokernelTester() 4018 .mr(1) 4019 .nr(8) 4020 .kr(1) 4021 .sr(1) 4022 .m(m) 4023 .n(n) 4024 .k(k) 4025 .cm_stride(11) 4026 .iterations(1) 4027 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4028 } 4029 } 4030 } 4031 } 4032 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,a_offset)4033 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, a_offset) { 4034 TEST_REQUIRES_ARM_NEON; 4035 for (size_t k = 1; k <= 40; k += 9) { 4036 GemmMicrokernelTester() 4037 .mr(1) 4038 .nr(8) 4039 .kr(1) 4040 .sr(1) 4041 .m(1) 4042 .n(8) 4043 .k(k) 4044 .ks(3) 4045 .a_offset(43) 4046 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4047 } 4048 } 4049 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,zero)4050 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, zero) { 4051 TEST_REQUIRES_ARM_NEON; 4052 for (size_t k = 1; k <= 40; k += 9) { 4053 for (uint32_t mz = 0; mz < 1; mz++) { 4054 GemmMicrokernelTester() 4055 .mr(1) 4056 .nr(8) 4057 .kr(1) 4058 .sr(1) 4059 .m(1) 4060 .n(8) 4061 .k(k) 4062 .ks(3) 4063 .a_offset(43) 4064 .zero_index(mz) 4065 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4066 } 4067 } 4068 } 4069 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,qmin)4070 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, qmin) { 4071 TEST_REQUIRES_ARM_NEON; 4072 GemmMicrokernelTester() 4073 .mr(1) 4074 .nr(8) 4075 .kr(1) 4076 .sr(1) 4077 .m(1) 4078 .n(8) 4079 .k(8) 4080 .qmin(128) 4081 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4082 } 4083 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,qmax)4084 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, qmax) { 4085 TEST_REQUIRES_ARM_NEON; 4086 GemmMicrokernelTester() 4087 .mr(1) 4088 .nr(8) 4089 .kr(1) 4090 .sr(1) 4091 .m(1) 4092 .n(8) 4093 .k(8) 4094 .qmax(128) 4095 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4096 } 4097 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,strided_cm)4098 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, strided_cm) { 4099 TEST_REQUIRES_ARM_NEON; 4100 GemmMicrokernelTester() 4101 .mr(1) 4102 .nr(8) 4103 .kr(1) 4104 .sr(1) 4105 .m(1) 4106 .n(8) 4107 .k(8) 4108 .cm_stride(11) 4109 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4110 } 4111 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,no_a_zero_point)4112 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, no_a_zero_point) { 4113 TEST_REQUIRES_ARM_NEON; 4114 for (size_t k = 1; k <= 40; k += 9) { 4115 GemmMicrokernelTester() 4116 .mr(1) 4117 .nr(8) 4118 .kr(1) 4119 .sr(1) 4120 .m(1) 4121 .n(8) 4122 .k(k) 4123 .a_zero_point(0) 4124 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4125 } 4126 } 4127 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,no_b_zero_point)4128 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, no_b_zero_point) { 4129 TEST_REQUIRES_ARM_NEON; 4130 for (size_t k = 1; k <= 40; k += 9) { 4131 GemmMicrokernelTester() 4132 .mr(1) 4133 .nr(8) 4134 .kr(1) 4135 .sr(1) 4136 .m(1) 4137 .n(8) 4138 .k(k) 4139 .b_zero_point(0) 4140 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4141 } 4142 } 4143 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE,no_zero_point)4144 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, no_zero_point) { 4145 TEST_REQUIRES_ARM_NEON; 4146 for (size_t k = 1; k <= 40; k += 9) { 4147 GemmMicrokernelTester() 4148 .mr(1) 4149 .nr(8) 4150 .kr(1) 4151 .sr(1) 4152 .m(1) 4153 .n(8) 4154 .k(k) 4155 .a_zero_point(0) 4156 .b_zero_point(0) 4157 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4158 } 4159 } 4160 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 4161 4162 4163 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64) TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_eq_8)4164 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8) { 4165 TEST_REQUIRES_ARM_NEON_DOT; 4166 GemmMicrokernelTester() 4167 .mr(1) 4168 .nr(8) 4169 .kr(4) 4170 .sr(1) 4171 .m(1) 4172 .n(8) 4173 .k(8) 4174 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4175 } 4176 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,strided_cn)4177 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, strided_cn) { 4178 TEST_REQUIRES_ARM_NEON_DOT; 4179 GemmMicrokernelTester() 4180 .mr(1) 4181 .nr(8) 4182 .kr(4) 4183 .sr(1) 4184 .m(1) 4185 .n(8) 4186 .k(8) 4187 .cn_stride(11) 4188 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4189 } 4190 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_eq_8_subtile)4191 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8_subtile) { 4192 TEST_REQUIRES_ARM_NEON_DOT; 4193 for (uint32_t n = 1; n <= 8; n++) { 4194 for (uint32_t m = 1; m <= 1; m++) { 4195 GemmMicrokernelTester() 4196 .mr(1) 4197 .nr(8) 4198 .kr(4) 4199 .sr(1) 4200 .m(m) 4201 .n(n) 4202 .k(8) 4203 .iterations(1) 4204 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4205 } 4206 } 4207 } 4208 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_eq_8_subtile_m)4209 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8_subtile_m) { 4210 TEST_REQUIRES_ARM_NEON_DOT; 4211 for (uint32_t m = 1; m <= 1; m++) { 4212 GemmMicrokernelTester() 4213 .mr(1) 4214 .nr(8) 4215 .kr(4) 4216 .sr(1) 4217 .m(m) 4218 .n(8) 4219 .k(8) 4220 .iterations(1) 4221 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4222 } 4223 } 4224 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_eq_8_subtile_n)4225 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8_subtile_n) { 4226 TEST_REQUIRES_ARM_NEON_DOT; 4227 for (uint32_t n = 1; n <= 8; n++) { 4228 GemmMicrokernelTester() 4229 .mr(1) 4230 .nr(8) 4231 .kr(4) 4232 .sr(1) 4233 .m(1) 4234 .n(n) 4235 .k(8) 4236 .iterations(1) 4237 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4238 } 4239 } 4240 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_lt_8)4241 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_lt_8) { 4242 TEST_REQUIRES_ARM_NEON_DOT; 4243 for (size_t k = 1; k < 8; k++) { 4244 GemmMicrokernelTester() 4245 .mr(1) 4246 .nr(8) 4247 .kr(4) 4248 .sr(1) 4249 .m(1) 4250 .n(8) 4251 .k(k) 4252 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4253 } 4254 } 4255 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_lt_8_subtile)4256 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_lt_8_subtile) { 4257 TEST_REQUIRES_ARM_NEON_DOT; 4258 for (size_t k = 1; k < 8; k++) { 4259 for (uint32_t n = 1; n <= 8; n++) { 4260 for (uint32_t m = 1; m <= 1; m++) { 4261 GemmMicrokernelTester() 4262 .mr(1) 4263 .nr(8) 4264 .kr(4) 4265 .sr(1) 4266 .m(m) 4267 .n(n) 4268 .k(k) 4269 .iterations(1) 4270 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4271 } 4272 } 4273 } 4274 } 4275 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_gt_8)4276 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_gt_8) { 4277 TEST_REQUIRES_ARM_NEON_DOT; 4278 for (size_t k = 9; k < 16; k++) { 4279 GemmMicrokernelTester() 4280 .mr(1) 4281 .nr(8) 4282 .kr(4) 4283 .sr(1) 4284 .m(1) 4285 .n(8) 4286 .k(k) 4287 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4288 } 4289 } 4290 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_gt_8_subtile)4291 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_gt_8_subtile) { 4292 TEST_REQUIRES_ARM_NEON_DOT; 4293 for (size_t k = 9; k < 16; k++) { 4294 for (uint32_t n = 1; n <= 8; n++) { 4295 for (uint32_t m = 1; m <= 1; m++) { 4296 GemmMicrokernelTester() 4297 .mr(1) 4298 .nr(8) 4299 .kr(4) 4300 .sr(1) 4301 .m(m) 4302 .n(n) 4303 .k(k) 4304 .iterations(1) 4305 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4306 } 4307 } 4308 } 4309 } 4310 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_div_8)4311 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_div_8) { 4312 TEST_REQUIRES_ARM_NEON_DOT; 4313 for (size_t k = 16; k <= 80; k += 8) { 4314 GemmMicrokernelTester() 4315 .mr(1) 4316 .nr(8) 4317 .kr(4) 4318 .sr(1) 4319 .m(1) 4320 .n(8) 4321 .k(k) 4322 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4323 } 4324 } 4325 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,k_div_8_subtile)4326 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_div_8_subtile) { 4327 TEST_REQUIRES_ARM_NEON_DOT; 4328 for (size_t k = 16; k <= 80; k += 8) { 4329 for (uint32_t n = 1; n <= 8; n++) { 4330 for (uint32_t m = 1; m <= 1; m++) { 4331 GemmMicrokernelTester() 4332 .mr(1) 4333 .nr(8) 4334 .kr(4) 4335 .sr(1) 4336 .m(m) 4337 .n(n) 4338 .k(k) 4339 .iterations(1) 4340 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4341 } 4342 } 4343 } 4344 } 4345 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,n_gt_8)4346 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8) { 4347 TEST_REQUIRES_ARM_NEON_DOT; 4348 for (uint32_t n = 9; n < 16; n++) { 4349 for (size_t k = 1; k <= 40; k += 9) { 4350 GemmMicrokernelTester() 4351 .mr(1) 4352 .nr(8) 4353 .kr(4) 4354 .sr(1) 4355 .m(1) 4356 .n(n) 4357 .k(k) 4358 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4359 } 4360 } 4361 } 4362 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,n_gt_8_strided_cn)4363 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8_strided_cn) { 4364 TEST_REQUIRES_ARM_NEON_DOT; 4365 for (uint32_t n = 9; n < 16; n++) { 4366 for (size_t k = 1; k <= 40; k += 9) { 4367 GemmMicrokernelTester() 4368 .mr(1) 4369 .nr(8) 4370 .kr(4) 4371 .sr(1) 4372 .m(1) 4373 .n(n) 4374 .k(k) 4375 .cn_stride(11) 4376 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4377 } 4378 } 4379 } 4380 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,n_gt_8_subtile)4381 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8_subtile) { 4382 TEST_REQUIRES_ARM_NEON_DOT; 4383 for (uint32_t n = 9; n < 16; n++) { 4384 for (size_t k = 1; k <= 40; k += 9) { 4385 for (uint32_t m = 1; m <= 1; m++) { 4386 GemmMicrokernelTester() 4387 .mr(1) 4388 .nr(8) 4389 .kr(4) 4390 .sr(1) 4391 .m(m) 4392 .n(n) 4393 .k(k) 4394 .iterations(1) 4395 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4396 } 4397 } 4398 } 4399 } 4400 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,n_div_8)4401 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8) { 4402 TEST_REQUIRES_ARM_NEON_DOT; 4403 for (uint32_t n = 16; n <= 24; n += 8) { 4404 for (size_t k = 1; k <= 40; k += 9) { 4405 GemmMicrokernelTester() 4406 .mr(1) 4407 .nr(8) 4408 .kr(4) 4409 .sr(1) 4410 .m(1) 4411 .n(n) 4412 .k(k) 4413 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4414 } 4415 } 4416 } 4417 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,n_div_8_strided_cn)4418 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8_strided_cn) { 4419 TEST_REQUIRES_ARM_NEON_DOT; 4420 for (uint32_t n = 16; n <= 24; n += 8) { 4421 for (size_t k = 1; k <= 40; k += 9) { 4422 GemmMicrokernelTester() 4423 .mr(1) 4424 .nr(8) 4425 .kr(4) 4426 .sr(1) 4427 .m(1) 4428 .n(n) 4429 .k(k) 4430 .cn_stride(11) 4431 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4432 } 4433 } 4434 } 4435 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,n_div_8_subtile)4436 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8_subtile) { 4437 TEST_REQUIRES_ARM_NEON_DOT; 4438 for (uint32_t n = 16; n <= 24; n += 8) { 4439 for (size_t k = 1; k <= 40; k += 9) { 4440 for (uint32_t m = 1; m <= 1; m++) { 4441 GemmMicrokernelTester() 4442 .mr(1) 4443 .nr(8) 4444 .kr(4) 4445 .sr(1) 4446 .m(m) 4447 .n(n) 4448 .k(k) 4449 .iterations(1) 4450 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4451 } 4452 } 4453 } 4454 } 4455 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,small_kernel)4456 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, small_kernel) { 4457 TEST_REQUIRES_ARM_NEON_DOT; 4458 for (size_t k = 1; k <= 40; k += 9) { 4459 GemmMicrokernelTester() 4460 .mr(1) 4461 .nr(8) 4462 .kr(4) 4463 .sr(1) 4464 .m(1) 4465 .n(8) 4466 .k(k) 4467 .ks(3) 4468 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4469 } 4470 } 4471 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,small_kernel_subtile)4472 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, small_kernel_subtile) { 4473 TEST_REQUIRES_ARM_NEON_DOT; 4474 for (size_t k = 1; k <= 40; k += 9) { 4475 for (uint32_t n = 1; n <= 8; n++) { 4476 for (uint32_t m = 1; m <= 1; m++) { 4477 GemmMicrokernelTester() 4478 .mr(1) 4479 .nr(8) 4480 .kr(4) 4481 .sr(1) 4482 .m(m) 4483 .n(n) 4484 .k(k) 4485 .ks(3) 4486 .iterations(1) 4487 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4488 } 4489 } 4490 } 4491 } 4492 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,n_gt_8_small_kernel)4493 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8_small_kernel) { 4494 TEST_REQUIRES_ARM_NEON_DOT; 4495 for (uint32_t n = 9; n < 16; n++) { 4496 for (size_t k = 1; k <= 40; k += 9) { 4497 GemmMicrokernelTester() 4498 .mr(1) 4499 .nr(8) 4500 .kr(4) 4501 .sr(1) 4502 .m(1) 4503 .n(n) 4504 .k(k) 4505 .ks(3) 4506 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4507 } 4508 } 4509 } 4510 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,n_div_8_small_kernel)4511 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8_small_kernel) { 4512 TEST_REQUIRES_ARM_NEON_DOT; 4513 for (uint32_t n = 16; n <= 24; n += 8) { 4514 for (size_t k = 1; k <= 40; k += 9) { 4515 GemmMicrokernelTester() 4516 .mr(1) 4517 .nr(8) 4518 .kr(4) 4519 .sr(1) 4520 .m(1) 4521 .n(n) 4522 .k(k) 4523 .ks(3) 4524 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4525 } 4526 } 4527 } 4528 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,strided_cm_subtile)4529 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, strided_cm_subtile) { 4530 TEST_REQUIRES_ARM_NEON_DOT; 4531 for (size_t k = 1; k <= 40; k += 9) { 4532 for (uint32_t n = 1; n <= 8; n++) { 4533 for (uint32_t m = 1; m <= 1; m++) { 4534 GemmMicrokernelTester() 4535 .mr(1) 4536 .nr(8) 4537 .kr(4) 4538 .sr(1) 4539 .m(m) 4540 .n(n) 4541 .k(k) 4542 .cm_stride(11) 4543 .iterations(1) 4544 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4545 } 4546 } 4547 } 4548 } 4549 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,a_offset)4550 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, a_offset) { 4551 TEST_REQUIRES_ARM_NEON_DOT; 4552 for (size_t k = 1; k <= 40; k += 9) { 4553 GemmMicrokernelTester() 4554 .mr(1) 4555 .nr(8) 4556 .kr(4) 4557 .sr(1) 4558 .m(1) 4559 .n(8) 4560 .k(k) 4561 .ks(3) 4562 .a_offset(43) 4563 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4564 } 4565 } 4566 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,zero)4567 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, zero) { 4568 TEST_REQUIRES_ARM_NEON_DOT; 4569 for (size_t k = 1; k <= 40; k += 9) { 4570 for (uint32_t mz = 0; mz < 1; mz++) { 4571 GemmMicrokernelTester() 4572 .mr(1) 4573 .nr(8) 4574 .kr(4) 4575 .sr(1) 4576 .m(1) 4577 .n(8) 4578 .k(k) 4579 .ks(3) 4580 .a_offset(43) 4581 .zero_index(mz) 4582 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4583 } 4584 } 4585 } 4586 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,qmin)4587 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, qmin) { 4588 TEST_REQUIRES_ARM_NEON_DOT; 4589 GemmMicrokernelTester() 4590 .mr(1) 4591 .nr(8) 4592 .kr(4) 4593 .sr(1) 4594 .m(1) 4595 .n(8) 4596 .k(8) 4597 .qmin(128) 4598 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4599 } 4600 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,qmax)4601 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, qmax) { 4602 TEST_REQUIRES_ARM_NEON_DOT; 4603 GemmMicrokernelTester() 4604 .mr(1) 4605 .nr(8) 4606 .kr(4) 4607 .sr(1) 4608 .m(1) 4609 .n(8) 4610 .k(8) 4611 .qmax(128) 4612 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4613 } 4614 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,strided_cm)4615 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, strided_cm) { 4616 TEST_REQUIRES_ARM_NEON_DOT; 4617 GemmMicrokernelTester() 4618 .mr(1) 4619 .nr(8) 4620 .kr(4) 4621 .sr(1) 4622 .m(1) 4623 .n(8) 4624 .k(8) 4625 .cm_stride(11) 4626 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4627 } 4628 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,no_a_zero_point)4629 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, no_a_zero_point) { 4630 TEST_REQUIRES_ARM_NEON_DOT; 4631 for (size_t k = 1; k <= 40; k += 9) { 4632 GemmMicrokernelTester() 4633 .mr(1) 4634 .nr(8) 4635 .kr(4) 4636 .sr(1) 4637 .m(1) 4638 .n(8) 4639 .k(k) 4640 .a_zero_point(0) 4641 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4642 } 4643 } 4644 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,no_b_zero_point)4645 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, no_b_zero_point) { 4646 TEST_REQUIRES_ARM_NEON_DOT; 4647 for (size_t k = 1; k <= 40; k += 9) { 4648 GemmMicrokernelTester() 4649 .mr(1) 4650 .nr(8) 4651 .kr(4) 4652 .sr(1) 4653 .m(1) 4654 .n(8) 4655 .k(k) 4656 .b_zero_point(0) 4657 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4658 } 4659 } 4660 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT,no_zero_point)4661 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, no_zero_point) { 4662 TEST_REQUIRES_ARM_NEON_DOT; 4663 for (size_t k = 1; k <= 40; k += 9) { 4664 GemmMicrokernelTester() 4665 .mr(1) 4666 .nr(8) 4667 .kr(4) 4668 .sr(1) 4669 .m(1) 4670 .n(8) 4671 .k(k) 4672 .a_zero_point(0) 4673 .b_zero_point(0) 4674 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4675 } 4676 } 4677 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64) 4678 4679 4680 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_eq_8)4681 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8) { 4682 TEST_REQUIRES_ARM_NEON; 4683 GemmMicrokernelTester() 4684 .mr(1) 4685 .nr(16) 4686 .kr(1) 4687 .sr(1) 4688 .m(1) 4689 .n(16) 4690 .k(8) 4691 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4692 } 4693 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,strided_cn)4694 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, strided_cn) { 4695 TEST_REQUIRES_ARM_NEON; 4696 GemmMicrokernelTester() 4697 .mr(1) 4698 .nr(16) 4699 .kr(1) 4700 .sr(1) 4701 .m(1) 4702 .n(16) 4703 .k(8) 4704 .cn_stride(19) 4705 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4706 } 4707 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_eq_8_subtile)4708 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8_subtile) { 4709 TEST_REQUIRES_ARM_NEON; 4710 for (uint32_t n = 1; n <= 16; n++) { 4711 for (uint32_t m = 1; m <= 1; m++) { 4712 GemmMicrokernelTester() 4713 .mr(1) 4714 .nr(16) 4715 .kr(1) 4716 .sr(1) 4717 .m(m) 4718 .n(n) 4719 .k(8) 4720 .iterations(1) 4721 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4722 } 4723 } 4724 } 4725 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_eq_8_subtile_m)4726 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8_subtile_m) { 4727 TEST_REQUIRES_ARM_NEON; 4728 for (uint32_t m = 1; m <= 1; m++) { 4729 GemmMicrokernelTester() 4730 .mr(1) 4731 .nr(16) 4732 .kr(1) 4733 .sr(1) 4734 .m(m) 4735 .n(16) 4736 .k(8) 4737 .iterations(1) 4738 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4739 } 4740 } 4741 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_eq_8_subtile_n)4742 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8_subtile_n) { 4743 TEST_REQUIRES_ARM_NEON; 4744 for (uint32_t n = 1; n <= 16; n++) { 4745 GemmMicrokernelTester() 4746 .mr(1) 4747 .nr(16) 4748 .kr(1) 4749 .sr(1) 4750 .m(1) 4751 .n(n) 4752 .k(8) 4753 .iterations(1) 4754 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4755 } 4756 } 4757 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_lt_8)4758 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_lt_8) { 4759 TEST_REQUIRES_ARM_NEON; 4760 for (size_t k = 1; k < 8; k++) { 4761 GemmMicrokernelTester() 4762 .mr(1) 4763 .nr(16) 4764 .kr(1) 4765 .sr(1) 4766 .m(1) 4767 .n(16) 4768 .k(k) 4769 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4770 } 4771 } 4772 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_lt_8_subtile)4773 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_lt_8_subtile) { 4774 TEST_REQUIRES_ARM_NEON; 4775 for (size_t k = 1; k < 8; k++) { 4776 for (uint32_t n = 1; n <= 16; n++) { 4777 for (uint32_t m = 1; m <= 1; m++) { 4778 GemmMicrokernelTester() 4779 .mr(1) 4780 .nr(16) 4781 .kr(1) 4782 .sr(1) 4783 .m(m) 4784 .n(n) 4785 .k(k) 4786 .iterations(1) 4787 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4788 } 4789 } 4790 } 4791 } 4792 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_gt_8)4793 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_gt_8) { 4794 TEST_REQUIRES_ARM_NEON; 4795 for (size_t k = 9; k < 16; k++) { 4796 GemmMicrokernelTester() 4797 .mr(1) 4798 .nr(16) 4799 .kr(1) 4800 .sr(1) 4801 .m(1) 4802 .n(16) 4803 .k(k) 4804 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4805 } 4806 } 4807 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_gt_8_subtile)4808 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_gt_8_subtile) { 4809 TEST_REQUIRES_ARM_NEON; 4810 for (size_t k = 9; k < 16; k++) { 4811 for (uint32_t n = 1; n <= 16; n++) { 4812 for (uint32_t m = 1; m <= 1; m++) { 4813 GemmMicrokernelTester() 4814 .mr(1) 4815 .nr(16) 4816 .kr(1) 4817 .sr(1) 4818 .m(m) 4819 .n(n) 4820 .k(k) 4821 .iterations(1) 4822 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4823 } 4824 } 4825 } 4826 } 4827 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_div_8)4828 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_div_8) { 4829 TEST_REQUIRES_ARM_NEON; 4830 for (size_t k = 16; k <= 80; k += 8) { 4831 GemmMicrokernelTester() 4832 .mr(1) 4833 .nr(16) 4834 .kr(1) 4835 .sr(1) 4836 .m(1) 4837 .n(16) 4838 .k(k) 4839 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4840 } 4841 } 4842 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,k_div_8_subtile)4843 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_div_8_subtile) { 4844 TEST_REQUIRES_ARM_NEON; 4845 for (size_t k = 16; k <= 80; k += 8) { 4846 for (uint32_t n = 1; n <= 16; n++) { 4847 for (uint32_t m = 1; m <= 1; m++) { 4848 GemmMicrokernelTester() 4849 .mr(1) 4850 .nr(16) 4851 .kr(1) 4852 .sr(1) 4853 .m(m) 4854 .n(n) 4855 .k(k) 4856 .iterations(1) 4857 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4858 } 4859 } 4860 } 4861 } 4862 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,n_gt_16)4863 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_gt_16) { 4864 TEST_REQUIRES_ARM_NEON; 4865 for (uint32_t n = 17; n < 32; n++) { 4866 for (size_t k = 1; k <= 40; k += 9) { 4867 GemmMicrokernelTester() 4868 .mr(1) 4869 .nr(16) 4870 .kr(1) 4871 .sr(1) 4872 .m(1) 4873 .n(n) 4874 .k(k) 4875 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4876 } 4877 } 4878 } 4879 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,n_gt_16_strided_cn)4880 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_gt_16_strided_cn) { 4881 TEST_REQUIRES_ARM_NEON; 4882 for (uint32_t n = 17; n < 32; n++) { 4883 for (size_t k = 1; k <= 40; k += 9) { 4884 GemmMicrokernelTester() 4885 .mr(1) 4886 .nr(16) 4887 .kr(1) 4888 .sr(1) 4889 .m(1) 4890 .n(n) 4891 .k(k) 4892 .cn_stride(19) 4893 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4894 } 4895 } 4896 } 4897 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,n_gt_16_subtile)4898 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_gt_16_subtile) { 4899 TEST_REQUIRES_ARM_NEON; 4900 for (uint32_t n = 17; n < 32; n++) { 4901 for (size_t k = 1; k <= 40; k += 9) { 4902 for (uint32_t m = 1; m <= 1; m++) { 4903 GemmMicrokernelTester() 4904 .mr(1) 4905 .nr(16) 4906 .kr(1) 4907 .sr(1) 4908 .m(m) 4909 .n(n) 4910 .k(k) 4911 .iterations(1) 4912 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4913 } 4914 } 4915 } 4916 } 4917 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,n_div_16)4918 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_div_16) { 4919 TEST_REQUIRES_ARM_NEON; 4920 for (uint32_t n = 32; n <= 48; n += 16) { 4921 for (size_t k = 1; k <= 40; k += 9) { 4922 GemmMicrokernelTester() 4923 .mr(1) 4924 .nr(16) 4925 .kr(1) 4926 .sr(1) 4927 .m(1) 4928 .n(n) 4929 .k(k) 4930 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4931 } 4932 } 4933 } 4934 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,n_div_16_strided_cn)4935 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_div_16_strided_cn) { 4936 TEST_REQUIRES_ARM_NEON; 4937 for (uint32_t n = 32; n <= 48; n += 16) { 4938 for (size_t k = 1; k <= 40; k += 9) { 4939 GemmMicrokernelTester() 4940 .mr(1) 4941 .nr(16) 4942 .kr(1) 4943 .sr(1) 4944 .m(1) 4945 .n(n) 4946 .k(k) 4947 .cn_stride(19) 4948 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4949 } 4950 } 4951 } 4952 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,n_div_16_subtile)4953 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_div_16_subtile) { 4954 TEST_REQUIRES_ARM_NEON; 4955 for (uint32_t n = 32; n <= 48; n += 16) { 4956 for (size_t k = 1; k <= 40; k += 9) { 4957 for (uint32_t m = 1; m <= 1; m++) { 4958 GemmMicrokernelTester() 4959 .mr(1) 4960 .nr(16) 4961 .kr(1) 4962 .sr(1) 4963 .m(m) 4964 .n(n) 4965 .k(k) 4966 .iterations(1) 4967 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4968 } 4969 } 4970 } 4971 } 4972 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,small_kernel)4973 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, small_kernel) { 4974 TEST_REQUIRES_ARM_NEON; 4975 for (size_t k = 1; k <= 40; k += 9) { 4976 GemmMicrokernelTester() 4977 .mr(1) 4978 .nr(16) 4979 .kr(1) 4980 .sr(1) 4981 .m(1) 4982 .n(16) 4983 .k(k) 4984 .ks(3) 4985 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4986 } 4987 } 4988 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,small_kernel_subtile)4989 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, small_kernel_subtile) { 4990 TEST_REQUIRES_ARM_NEON; 4991 for (size_t k = 1; k <= 40; k += 9) { 4992 for (uint32_t n = 1; n <= 16; n++) { 4993 for (uint32_t m = 1; m <= 1; m++) { 4994 GemmMicrokernelTester() 4995 .mr(1) 4996 .nr(16) 4997 .kr(1) 4998 .sr(1) 4999 .m(m) 5000 .n(n) 5001 .k(k) 5002 .ks(3) 5003 .iterations(1) 5004 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5005 } 5006 } 5007 } 5008 } 5009 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,n_gt_16_small_kernel)5010 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_gt_16_small_kernel) { 5011 TEST_REQUIRES_ARM_NEON; 5012 for (uint32_t n = 17; n < 32; n++) { 5013 for (size_t k = 1; k <= 40; k += 9) { 5014 GemmMicrokernelTester() 5015 .mr(1) 5016 .nr(16) 5017 .kr(1) 5018 .sr(1) 5019 .m(1) 5020 .n(n) 5021 .k(k) 5022 .ks(3) 5023 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5024 } 5025 } 5026 } 5027 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,n_div_16_small_kernel)5028 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_div_16_small_kernel) { 5029 TEST_REQUIRES_ARM_NEON; 5030 for (uint32_t n = 32; n <= 48; n += 16) { 5031 for (size_t k = 1; k <= 40; k += 9) { 5032 GemmMicrokernelTester() 5033 .mr(1) 5034 .nr(16) 5035 .kr(1) 5036 .sr(1) 5037 .m(1) 5038 .n(n) 5039 .k(k) 5040 .ks(3) 5041 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5042 } 5043 } 5044 } 5045 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,strided_cm_subtile)5046 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, strided_cm_subtile) { 5047 TEST_REQUIRES_ARM_NEON; 5048 for (size_t k = 1; k <= 40; k += 9) { 5049 for (uint32_t n = 1; n <= 16; n++) { 5050 for (uint32_t m = 1; m <= 1; m++) { 5051 GemmMicrokernelTester() 5052 .mr(1) 5053 .nr(16) 5054 .kr(1) 5055 .sr(1) 5056 .m(m) 5057 .n(n) 5058 .k(k) 5059 .cm_stride(19) 5060 .iterations(1) 5061 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5062 } 5063 } 5064 } 5065 } 5066 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,a_offset)5067 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, a_offset) { 5068 TEST_REQUIRES_ARM_NEON; 5069 for (size_t k = 1; k <= 40; k += 9) { 5070 GemmMicrokernelTester() 5071 .mr(1) 5072 .nr(16) 5073 .kr(1) 5074 .sr(1) 5075 .m(1) 5076 .n(16) 5077 .k(k) 5078 .ks(3) 5079 .a_offset(43) 5080 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5081 } 5082 } 5083 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,zero)5084 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, zero) { 5085 TEST_REQUIRES_ARM_NEON; 5086 for (size_t k = 1; k <= 40; k += 9) { 5087 for (uint32_t mz = 0; mz < 1; mz++) { 5088 GemmMicrokernelTester() 5089 .mr(1) 5090 .nr(16) 5091 .kr(1) 5092 .sr(1) 5093 .m(1) 5094 .n(16) 5095 .k(k) 5096 .ks(3) 5097 .a_offset(43) 5098 .zero_index(mz) 5099 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5100 } 5101 } 5102 } 5103 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,qmin)5104 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, qmin) { 5105 TEST_REQUIRES_ARM_NEON; 5106 GemmMicrokernelTester() 5107 .mr(1) 5108 .nr(16) 5109 .kr(1) 5110 .sr(1) 5111 .m(1) 5112 .n(16) 5113 .k(8) 5114 .qmin(128) 5115 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5116 } 5117 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,qmax)5118 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, qmax) { 5119 TEST_REQUIRES_ARM_NEON; 5120 GemmMicrokernelTester() 5121 .mr(1) 5122 .nr(16) 5123 .kr(1) 5124 .sr(1) 5125 .m(1) 5126 .n(16) 5127 .k(8) 5128 .qmax(128) 5129 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5130 } 5131 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,strided_cm)5132 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, strided_cm) { 5133 TEST_REQUIRES_ARM_NEON; 5134 GemmMicrokernelTester() 5135 .mr(1) 5136 .nr(16) 5137 .kr(1) 5138 .sr(1) 5139 .m(1) 5140 .n(16) 5141 .k(8) 5142 .cm_stride(19) 5143 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5144 } 5145 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,no_a_zero_point)5146 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, no_a_zero_point) { 5147 TEST_REQUIRES_ARM_NEON; 5148 for (size_t k = 1; k <= 40; k += 9) { 5149 GemmMicrokernelTester() 5150 .mr(1) 5151 .nr(16) 5152 .kr(1) 5153 .sr(1) 5154 .m(1) 5155 .n(16) 5156 .k(k) 5157 .a_zero_point(0) 5158 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5159 } 5160 } 5161 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,no_b_zero_point)5162 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, no_b_zero_point) { 5163 TEST_REQUIRES_ARM_NEON; 5164 for (size_t k = 1; k <= 40; k += 9) { 5165 GemmMicrokernelTester() 5166 .mr(1) 5167 .nr(16) 5168 .kr(1) 5169 .sr(1) 5170 .m(1) 5171 .n(16) 5172 .k(k) 5173 .b_zero_point(0) 5174 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5175 } 5176 } 5177 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE,no_zero_point)5178 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, no_zero_point) { 5179 TEST_REQUIRES_ARM_NEON; 5180 for (size_t k = 1; k <= 40; k += 9) { 5181 GemmMicrokernelTester() 5182 .mr(1) 5183 .nr(16) 5184 .kr(1) 5185 .sr(1) 5186 .m(1) 5187 .n(16) 5188 .k(k) 5189 .a_zero_point(0) 5190 .b_zero_point(0) 5191 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5192 } 5193 } 5194 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 5195 5196 5197 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_eq_8)5198 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8) { 5199 TEST_REQUIRES_ARM_NEON; 5200 GemmMicrokernelTester() 5201 .mr(2) 5202 .nr(16) 5203 .kr(1) 5204 .sr(1) 5205 .m(2) 5206 .n(16) 5207 .k(8) 5208 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5209 } 5210 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,strided_cn)5211 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, strided_cn) { 5212 TEST_REQUIRES_ARM_NEON; 5213 GemmMicrokernelTester() 5214 .mr(2) 5215 .nr(16) 5216 .kr(1) 5217 .sr(1) 5218 .m(2) 5219 .n(16) 5220 .k(8) 5221 .cn_stride(19) 5222 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5223 } 5224 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_eq_8_subtile)5225 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8_subtile) { 5226 TEST_REQUIRES_ARM_NEON; 5227 for (uint32_t n = 1; n <= 16; n++) { 5228 for (uint32_t m = 1; m <= 2; m++) { 5229 GemmMicrokernelTester() 5230 .mr(2) 5231 .nr(16) 5232 .kr(1) 5233 .sr(1) 5234 .m(m) 5235 .n(n) 5236 .k(8) 5237 .iterations(1) 5238 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5239 } 5240 } 5241 } 5242 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_eq_8_subtile_m)5243 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8_subtile_m) { 5244 TEST_REQUIRES_ARM_NEON; 5245 for (uint32_t m = 1; m <= 2; m++) { 5246 GemmMicrokernelTester() 5247 .mr(2) 5248 .nr(16) 5249 .kr(1) 5250 .sr(1) 5251 .m(m) 5252 .n(16) 5253 .k(8) 5254 .iterations(1) 5255 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5256 } 5257 } 5258 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_eq_8_subtile_n)5259 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8_subtile_n) { 5260 TEST_REQUIRES_ARM_NEON; 5261 for (uint32_t n = 1; n <= 16; n++) { 5262 GemmMicrokernelTester() 5263 .mr(2) 5264 .nr(16) 5265 .kr(1) 5266 .sr(1) 5267 .m(2) 5268 .n(n) 5269 .k(8) 5270 .iterations(1) 5271 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5272 } 5273 } 5274 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_lt_8)5275 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_lt_8) { 5276 TEST_REQUIRES_ARM_NEON; 5277 for (size_t k = 1; k < 8; k++) { 5278 GemmMicrokernelTester() 5279 .mr(2) 5280 .nr(16) 5281 .kr(1) 5282 .sr(1) 5283 .m(2) 5284 .n(16) 5285 .k(k) 5286 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5287 } 5288 } 5289 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_lt_8_subtile)5290 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_lt_8_subtile) { 5291 TEST_REQUIRES_ARM_NEON; 5292 for (size_t k = 1; k < 8; k++) { 5293 for (uint32_t n = 1; n <= 16; n++) { 5294 for (uint32_t m = 1; m <= 2; m++) { 5295 GemmMicrokernelTester() 5296 .mr(2) 5297 .nr(16) 5298 .kr(1) 5299 .sr(1) 5300 .m(m) 5301 .n(n) 5302 .k(k) 5303 .iterations(1) 5304 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5305 } 5306 } 5307 } 5308 } 5309 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_gt_8)5310 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_gt_8) { 5311 TEST_REQUIRES_ARM_NEON; 5312 for (size_t k = 9; k < 16; k++) { 5313 GemmMicrokernelTester() 5314 .mr(2) 5315 .nr(16) 5316 .kr(1) 5317 .sr(1) 5318 .m(2) 5319 .n(16) 5320 .k(k) 5321 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5322 } 5323 } 5324 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_gt_8_subtile)5325 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_gt_8_subtile) { 5326 TEST_REQUIRES_ARM_NEON; 5327 for (size_t k = 9; k < 16; k++) { 5328 for (uint32_t n = 1; n <= 16; n++) { 5329 for (uint32_t m = 1; m <= 2; m++) { 5330 GemmMicrokernelTester() 5331 .mr(2) 5332 .nr(16) 5333 .kr(1) 5334 .sr(1) 5335 .m(m) 5336 .n(n) 5337 .k(k) 5338 .iterations(1) 5339 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5340 } 5341 } 5342 } 5343 } 5344 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_div_8)5345 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_div_8) { 5346 TEST_REQUIRES_ARM_NEON; 5347 for (size_t k = 16; k <= 80; k += 8) { 5348 GemmMicrokernelTester() 5349 .mr(2) 5350 .nr(16) 5351 .kr(1) 5352 .sr(1) 5353 .m(2) 5354 .n(16) 5355 .k(k) 5356 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5357 } 5358 } 5359 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_div_8_subtile)5360 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_div_8_subtile) { 5361 TEST_REQUIRES_ARM_NEON; 5362 for (size_t k = 16; k <= 80; k += 8) { 5363 for (uint32_t n = 1; n <= 16; n++) { 5364 for (uint32_t m = 1; m <= 2; m++) { 5365 GemmMicrokernelTester() 5366 .mr(2) 5367 .nr(16) 5368 .kr(1) 5369 .sr(1) 5370 .m(m) 5371 .n(n) 5372 .k(k) 5373 .iterations(1) 5374 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5375 } 5376 } 5377 } 5378 } 5379 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_gt_16)5380 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_gt_16) { 5381 TEST_REQUIRES_ARM_NEON; 5382 for (uint32_t n = 17; n < 32; n++) { 5383 for (size_t k = 1; k <= 40; k += 9) { 5384 GemmMicrokernelTester() 5385 .mr(2) 5386 .nr(16) 5387 .kr(1) 5388 .sr(1) 5389 .m(2) 5390 .n(n) 5391 .k(k) 5392 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5393 } 5394 } 5395 } 5396 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_gt_16_strided_cn)5397 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_gt_16_strided_cn) { 5398 TEST_REQUIRES_ARM_NEON; 5399 for (uint32_t n = 17; n < 32; n++) { 5400 for (size_t k = 1; k <= 40; k += 9) { 5401 GemmMicrokernelTester() 5402 .mr(2) 5403 .nr(16) 5404 .kr(1) 5405 .sr(1) 5406 .m(2) 5407 .n(n) 5408 .k(k) 5409 .cn_stride(19) 5410 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5411 } 5412 } 5413 } 5414 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_gt_16_subtile)5415 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_gt_16_subtile) { 5416 TEST_REQUIRES_ARM_NEON; 5417 for (uint32_t n = 17; n < 32; n++) { 5418 for (size_t k = 1; k <= 40; k += 9) { 5419 for (uint32_t m = 1; m <= 2; m++) { 5420 GemmMicrokernelTester() 5421 .mr(2) 5422 .nr(16) 5423 .kr(1) 5424 .sr(1) 5425 .m(m) 5426 .n(n) 5427 .k(k) 5428 .iterations(1) 5429 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5430 } 5431 } 5432 } 5433 } 5434 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_div_16)5435 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_div_16) { 5436 TEST_REQUIRES_ARM_NEON; 5437 for (uint32_t n = 32; n <= 48; n += 16) { 5438 for (size_t k = 1; k <= 40; k += 9) { 5439 GemmMicrokernelTester() 5440 .mr(2) 5441 .nr(16) 5442 .kr(1) 5443 .sr(1) 5444 .m(2) 5445 .n(n) 5446 .k(k) 5447 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5448 } 5449 } 5450 } 5451 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_div_16_strided_cn)5452 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_div_16_strided_cn) { 5453 TEST_REQUIRES_ARM_NEON; 5454 for (uint32_t n = 32; n <= 48; n += 16) { 5455 for (size_t k = 1; k <= 40; k += 9) { 5456 GemmMicrokernelTester() 5457 .mr(2) 5458 .nr(16) 5459 .kr(1) 5460 .sr(1) 5461 .m(2) 5462 .n(n) 5463 .k(k) 5464 .cn_stride(19) 5465 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5466 } 5467 } 5468 } 5469 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_div_16_subtile)5470 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_div_16_subtile) { 5471 TEST_REQUIRES_ARM_NEON; 5472 for (uint32_t n = 32; n <= 48; n += 16) { 5473 for (size_t k = 1; k <= 40; k += 9) { 5474 for (uint32_t m = 1; m <= 2; m++) { 5475 GemmMicrokernelTester() 5476 .mr(2) 5477 .nr(16) 5478 .kr(1) 5479 .sr(1) 5480 .m(m) 5481 .n(n) 5482 .k(k) 5483 .iterations(1) 5484 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5485 } 5486 } 5487 } 5488 } 5489 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,small_kernel)5490 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, small_kernel) { 5491 TEST_REQUIRES_ARM_NEON; 5492 for (size_t k = 1; k <= 40; k += 9) { 5493 GemmMicrokernelTester() 5494 .mr(2) 5495 .nr(16) 5496 .kr(1) 5497 .sr(1) 5498 .m(2) 5499 .n(16) 5500 .k(k) 5501 .ks(3) 5502 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5503 } 5504 } 5505 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,small_kernel_subtile)5506 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, small_kernel_subtile) { 5507 TEST_REQUIRES_ARM_NEON; 5508 for (size_t k = 1; k <= 40; k += 9) { 5509 for (uint32_t n = 1; n <= 16; n++) { 5510 for (uint32_t m = 1; m <= 2; m++) { 5511 GemmMicrokernelTester() 5512 .mr(2) 5513 .nr(16) 5514 .kr(1) 5515 .sr(1) 5516 .m(m) 5517 .n(n) 5518 .k(k) 5519 .ks(3) 5520 .iterations(1) 5521 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5522 } 5523 } 5524 } 5525 } 5526 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_gt_16_small_kernel)5527 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_gt_16_small_kernel) { 5528 TEST_REQUIRES_ARM_NEON; 5529 for (uint32_t n = 17; n < 32; n++) { 5530 for (size_t k = 1; k <= 40; k += 9) { 5531 GemmMicrokernelTester() 5532 .mr(2) 5533 .nr(16) 5534 .kr(1) 5535 .sr(1) 5536 .m(2) 5537 .n(n) 5538 .k(k) 5539 .ks(3) 5540 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5541 } 5542 } 5543 } 5544 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_div_16_small_kernel)5545 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_div_16_small_kernel) { 5546 TEST_REQUIRES_ARM_NEON; 5547 for (uint32_t n = 32; n <= 48; n += 16) { 5548 for (size_t k = 1; k <= 40; k += 9) { 5549 GemmMicrokernelTester() 5550 .mr(2) 5551 .nr(16) 5552 .kr(1) 5553 .sr(1) 5554 .m(2) 5555 .n(n) 5556 .k(k) 5557 .ks(3) 5558 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5559 } 5560 } 5561 } 5562 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,strided_cm_subtile)5563 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, strided_cm_subtile) { 5564 TEST_REQUIRES_ARM_NEON; 5565 for (size_t k = 1; k <= 40; k += 9) { 5566 for (uint32_t n = 1; n <= 16; n++) { 5567 for (uint32_t m = 1; m <= 2; m++) { 5568 GemmMicrokernelTester() 5569 .mr(2) 5570 .nr(16) 5571 .kr(1) 5572 .sr(1) 5573 .m(m) 5574 .n(n) 5575 .k(k) 5576 .cm_stride(19) 5577 .iterations(1) 5578 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5579 } 5580 } 5581 } 5582 } 5583 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,a_offset)5584 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, a_offset) { 5585 TEST_REQUIRES_ARM_NEON; 5586 for (size_t k = 1; k <= 40; k += 9) { 5587 GemmMicrokernelTester() 5588 .mr(2) 5589 .nr(16) 5590 .kr(1) 5591 .sr(1) 5592 .m(2) 5593 .n(16) 5594 .k(k) 5595 .ks(3) 5596 .a_offset(83) 5597 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5598 } 5599 } 5600 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,zero)5601 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, zero) { 5602 TEST_REQUIRES_ARM_NEON; 5603 for (size_t k = 1; k <= 40; k += 9) { 5604 for (uint32_t mz = 0; mz < 2; mz++) { 5605 GemmMicrokernelTester() 5606 .mr(2) 5607 .nr(16) 5608 .kr(1) 5609 .sr(1) 5610 .m(2) 5611 .n(16) 5612 .k(k) 5613 .ks(3) 5614 .a_offset(83) 5615 .zero_index(mz) 5616 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5617 } 5618 } 5619 } 5620 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,qmin)5621 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, qmin) { 5622 TEST_REQUIRES_ARM_NEON; 5623 GemmMicrokernelTester() 5624 .mr(2) 5625 .nr(16) 5626 .kr(1) 5627 .sr(1) 5628 .m(2) 5629 .n(16) 5630 .k(8) 5631 .qmin(128) 5632 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5633 } 5634 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,qmax)5635 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, qmax) { 5636 TEST_REQUIRES_ARM_NEON; 5637 GemmMicrokernelTester() 5638 .mr(2) 5639 .nr(16) 5640 .kr(1) 5641 .sr(1) 5642 .m(2) 5643 .n(16) 5644 .k(8) 5645 .qmax(128) 5646 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5647 } 5648 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,strided_cm)5649 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, strided_cm) { 5650 TEST_REQUIRES_ARM_NEON; 5651 GemmMicrokernelTester() 5652 .mr(2) 5653 .nr(16) 5654 .kr(1) 5655 .sr(1) 5656 .m(2) 5657 .n(16) 5658 .k(8) 5659 .cm_stride(19) 5660 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5661 } 5662 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,no_a_zero_point)5663 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, no_a_zero_point) { 5664 TEST_REQUIRES_ARM_NEON; 5665 for (size_t k = 1; k <= 40; k += 9) { 5666 GemmMicrokernelTester() 5667 .mr(2) 5668 .nr(16) 5669 .kr(1) 5670 .sr(1) 5671 .m(2) 5672 .n(16) 5673 .k(k) 5674 .a_zero_point(0) 5675 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5676 } 5677 } 5678 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,no_b_zero_point)5679 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, no_b_zero_point) { 5680 TEST_REQUIRES_ARM_NEON; 5681 for (size_t k = 1; k <= 40; k += 9) { 5682 GemmMicrokernelTester() 5683 .mr(2) 5684 .nr(16) 5685 .kr(1) 5686 .sr(1) 5687 .m(2) 5688 .n(16) 5689 .k(k) 5690 .b_zero_point(0) 5691 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5692 } 5693 } 5694 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,no_zero_point)5695 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, no_zero_point) { 5696 TEST_REQUIRES_ARM_NEON; 5697 for (size_t k = 1; k <= 40; k += 9) { 5698 GemmMicrokernelTester() 5699 .mr(2) 5700 .nr(16) 5701 .kr(1) 5702 .sr(1) 5703 .m(2) 5704 .n(16) 5705 .k(k) 5706 .a_zero_point(0) 5707 .b_zero_point(0) 5708 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5709 } 5710 } 5711 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 5712 5713 5714 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64) TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,k_eq_8)5715 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_eq_8) { 5716 TEST_REQUIRES_ARM_NEON_DOT; 5717 GemmMicrokernelTester() 5718 .mr(2) 5719 .nr(16) 5720 .kr(4) 5721 .sr(1) 5722 .m(2) 5723 .n(16) 5724 .k(8) 5725 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5726 } 5727 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,strided_cn)5728 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, strided_cn) { 5729 TEST_REQUIRES_ARM_NEON_DOT; 5730 GemmMicrokernelTester() 5731 .mr(2) 5732 .nr(16) 5733 .kr(4) 5734 .sr(1) 5735 .m(2) 5736 .n(16) 5737 .k(8) 5738 .cn_stride(19) 5739 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5740 } 5741 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,k_eq_8_subtile)5742 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_eq_8_subtile) { 5743 TEST_REQUIRES_ARM_NEON_DOT; 5744 for (uint32_t n = 1; n <= 16; n++) { 5745 for (uint32_t m = 1; m <= 2; m++) { 5746 GemmMicrokernelTester() 5747 .mr(2) 5748 .nr(16) 5749 .kr(4) 5750 .sr(1) 5751 .m(m) 5752 .n(n) 5753 .k(8) 5754 .iterations(1) 5755 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5756 } 5757 } 5758 } 5759 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,k_eq_8_subtile_m)5760 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_eq_8_subtile_m) { 5761 TEST_REQUIRES_ARM_NEON_DOT; 5762 for (uint32_t m = 1; m <= 2; m++) { 5763 GemmMicrokernelTester() 5764 .mr(2) 5765 .nr(16) 5766 .kr(4) 5767 .sr(1) 5768 .m(m) 5769 .n(16) 5770 .k(8) 5771 .iterations(1) 5772 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5773 } 5774 } 5775 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,k_eq_8_subtile_n)5776 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_eq_8_subtile_n) { 5777 TEST_REQUIRES_ARM_NEON_DOT; 5778 for (uint32_t n = 1; n <= 16; n++) { 5779 GemmMicrokernelTester() 5780 .mr(2) 5781 .nr(16) 5782 .kr(4) 5783 .sr(1) 5784 .m(2) 5785 .n(n) 5786 .k(8) 5787 .iterations(1) 5788 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5789 } 5790 } 5791 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,k_lt_8)5792 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_lt_8) { 5793 TEST_REQUIRES_ARM_NEON_DOT; 5794 for (size_t k = 1; k < 8; k++) { 5795 GemmMicrokernelTester() 5796 .mr(2) 5797 .nr(16) 5798 .kr(4) 5799 .sr(1) 5800 .m(2) 5801 .n(16) 5802 .k(k) 5803 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5804 } 5805 } 5806 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,k_lt_8_subtile)5807 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_lt_8_subtile) { 5808 TEST_REQUIRES_ARM_NEON_DOT; 5809 for (size_t k = 1; k < 8; k++) { 5810 for (uint32_t n = 1; n <= 16; n++) { 5811 for (uint32_t m = 1; m <= 2; m++) { 5812 GemmMicrokernelTester() 5813 .mr(2) 5814 .nr(16) 5815 .kr(4) 5816 .sr(1) 5817 .m(m) 5818 .n(n) 5819 .k(k) 5820 .iterations(1) 5821 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5822 } 5823 } 5824 } 5825 } 5826 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,k_gt_8)5827 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_gt_8) { 5828 TEST_REQUIRES_ARM_NEON_DOT; 5829 for (size_t k = 9; k < 16; k++) { 5830 GemmMicrokernelTester() 5831 .mr(2) 5832 .nr(16) 5833 .kr(4) 5834 .sr(1) 5835 .m(2) 5836 .n(16) 5837 .k(k) 5838 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5839 } 5840 } 5841 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,k_gt_8_subtile)5842 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_gt_8_subtile) { 5843 TEST_REQUIRES_ARM_NEON_DOT; 5844 for (size_t k = 9; k < 16; k++) { 5845 for (uint32_t n = 1; n <= 16; n++) { 5846 for (uint32_t m = 1; m <= 2; m++) { 5847 GemmMicrokernelTester() 5848 .mr(2) 5849 .nr(16) 5850 .kr(4) 5851 .sr(1) 5852 .m(m) 5853 .n(n) 5854 .k(k) 5855 .iterations(1) 5856 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5857 } 5858 } 5859 } 5860 } 5861 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,k_div_8)5862 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_div_8) { 5863 TEST_REQUIRES_ARM_NEON_DOT; 5864 for (size_t k = 16; k <= 80; k += 8) { 5865 GemmMicrokernelTester() 5866 .mr(2) 5867 .nr(16) 5868 .kr(4) 5869 .sr(1) 5870 .m(2) 5871 .n(16) 5872 .k(k) 5873 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5874 } 5875 } 5876 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,k_div_8_subtile)5877 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_div_8_subtile) { 5878 TEST_REQUIRES_ARM_NEON_DOT; 5879 for (size_t k = 16; k <= 80; k += 8) { 5880 for (uint32_t n = 1; n <= 16; n++) { 5881 for (uint32_t m = 1; m <= 2; m++) { 5882 GemmMicrokernelTester() 5883 .mr(2) 5884 .nr(16) 5885 .kr(4) 5886 .sr(1) 5887 .m(m) 5888 .n(n) 5889 .k(k) 5890 .iterations(1) 5891 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5892 } 5893 } 5894 } 5895 } 5896 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,n_gt_16)5897 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, n_gt_16) { 5898 TEST_REQUIRES_ARM_NEON_DOT; 5899 for (uint32_t n = 17; n < 32; n++) { 5900 for (size_t k = 1; k <= 40; k += 9) { 5901 GemmMicrokernelTester() 5902 .mr(2) 5903 .nr(16) 5904 .kr(4) 5905 .sr(1) 5906 .m(2) 5907 .n(n) 5908 .k(k) 5909 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5910 } 5911 } 5912 } 5913 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,n_gt_16_strided_cn)5914 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, n_gt_16_strided_cn) { 5915 TEST_REQUIRES_ARM_NEON_DOT; 5916 for (uint32_t n = 17; n < 32; n++) { 5917 for (size_t k = 1; k <= 40; k += 9) { 5918 GemmMicrokernelTester() 5919 .mr(2) 5920 .nr(16) 5921 .kr(4) 5922 .sr(1) 5923 .m(2) 5924 .n(n) 5925 .k(k) 5926 .cn_stride(19) 5927 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5928 } 5929 } 5930 } 5931 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,n_gt_16_subtile)5932 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, n_gt_16_subtile) { 5933 TEST_REQUIRES_ARM_NEON_DOT; 5934 for (uint32_t n = 17; n < 32; n++) { 5935 for (size_t k = 1; k <= 40; k += 9) { 5936 for (uint32_t m = 1; m <= 2; m++) { 5937 GemmMicrokernelTester() 5938 .mr(2) 5939 .nr(16) 5940 .kr(4) 5941 .sr(1) 5942 .m(m) 5943 .n(n) 5944 .k(k) 5945 .iterations(1) 5946 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5947 } 5948 } 5949 } 5950 } 5951 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,n_div_16)5952 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, n_div_16) { 5953 TEST_REQUIRES_ARM_NEON_DOT; 5954 for (uint32_t n = 32; n <= 48; n += 16) { 5955 for (size_t k = 1; k <= 40; k += 9) { 5956 GemmMicrokernelTester() 5957 .mr(2) 5958 .nr(16) 5959 .kr(4) 5960 .sr(1) 5961 .m(2) 5962 .n(n) 5963 .k(k) 5964 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5965 } 5966 } 5967 } 5968 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,n_div_16_strided_cn)5969 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, n_div_16_strided_cn) { 5970 TEST_REQUIRES_ARM_NEON_DOT; 5971 for (uint32_t n = 32; n <= 48; n += 16) { 5972 for (size_t k = 1; k <= 40; k += 9) { 5973 GemmMicrokernelTester() 5974 .mr(2) 5975 .nr(16) 5976 .kr(4) 5977 .sr(1) 5978 .m(2) 5979 .n(n) 5980 .k(k) 5981 .cn_stride(19) 5982 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5983 } 5984 } 5985 } 5986 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,n_div_16_subtile)5987 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, n_div_16_subtile) { 5988 TEST_REQUIRES_ARM_NEON_DOT; 5989 for (uint32_t n = 32; n <= 48; n += 16) { 5990 for (size_t k = 1; k <= 40; k += 9) { 5991 for (uint32_t m = 1; m <= 2; m++) { 5992 GemmMicrokernelTester() 5993 .mr(2) 5994 .nr(16) 5995 .kr(4) 5996 .sr(1) 5997 .m(m) 5998 .n(n) 5999 .k(k) 6000 .iterations(1) 6001 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6002 } 6003 } 6004 } 6005 } 6006 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,small_kernel)6007 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, small_kernel) { 6008 TEST_REQUIRES_ARM_NEON_DOT; 6009 for (size_t k = 1; k <= 40; k += 9) { 6010 GemmMicrokernelTester() 6011 .mr(2) 6012 .nr(16) 6013 .kr(4) 6014 .sr(1) 6015 .m(2) 6016 .n(16) 6017 .k(k) 6018 .ks(3) 6019 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6020 } 6021 } 6022 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,small_kernel_subtile)6023 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, small_kernel_subtile) { 6024 TEST_REQUIRES_ARM_NEON_DOT; 6025 for (size_t k = 1; k <= 40; k += 9) { 6026 for (uint32_t n = 1; n <= 16; n++) { 6027 for (uint32_t m = 1; m <= 2; m++) { 6028 GemmMicrokernelTester() 6029 .mr(2) 6030 .nr(16) 6031 .kr(4) 6032 .sr(1) 6033 .m(m) 6034 .n(n) 6035 .k(k) 6036 .ks(3) 6037 .iterations(1) 6038 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6039 } 6040 } 6041 } 6042 } 6043 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,n_gt_16_small_kernel)6044 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, n_gt_16_small_kernel) { 6045 TEST_REQUIRES_ARM_NEON_DOT; 6046 for (uint32_t n = 17; n < 32; n++) { 6047 for (size_t k = 1; k <= 40; k += 9) { 6048 GemmMicrokernelTester() 6049 .mr(2) 6050 .nr(16) 6051 .kr(4) 6052 .sr(1) 6053 .m(2) 6054 .n(n) 6055 .k(k) 6056 .ks(3) 6057 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6058 } 6059 } 6060 } 6061 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,n_div_16_small_kernel)6062 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, n_div_16_small_kernel) { 6063 TEST_REQUIRES_ARM_NEON_DOT; 6064 for (uint32_t n = 32; n <= 48; n += 16) { 6065 for (size_t k = 1; k <= 40; k += 9) { 6066 GemmMicrokernelTester() 6067 .mr(2) 6068 .nr(16) 6069 .kr(4) 6070 .sr(1) 6071 .m(2) 6072 .n(n) 6073 .k(k) 6074 .ks(3) 6075 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6076 } 6077 } 6078 } 6079 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,strided_cm_subtile)6080 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, strided_cm_subtile) { 6081 TEST_REQUIRES_ARM_NEON_DOT; 6082 for (size_t k = 1; k <= 40; k += 9) { 6083 for (uint32_t n = 1; n <= 16; n++) { 6084 for (uint32_t m = 1; m <= 2; m++) { 6085 GemmMicrokernelTester() 6086 .mr(2) 6087 .nr(16) 6088 .kr(4) 6089 .sr(1) 6090 .m(m) 6091 .n(n) 6092 .k(k) 6093 .cm_stride(19) 6094 .iterations(1) 6095 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6096 } 6097 } 6098 } 6099 } 6100 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,a_offset)6101 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, a_offset) { 6102 TEST_REQUIRES_ARM_NEON_DOT; 6103 for (size_t k = 1; k <= 40; k += 9) { 6104 GemmMicrokernelTester() 6105 .mr(2) 6106 .nr(16) 6107 .kr(4) 6108 .sr(1) 6109 .m(2) 6110 .n(16) 6111 .k(k) 6112 .ks(3) 6113 .a_offset(83) 6114 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6115 } 6116 } 6117 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,zero)6118 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, zero) { 6119 TEST_REQUIRES_ARM_NEON_DOT; 6120 for (size_t k = 1; k <= 40; k += 9) { 6121 for (uint32_t mz = 0; mz < 2; mz++) { 6122 GemmMicrokernelTester() 6123 .mr(2) 6124 .nr(16) 6125 .kr(4) 6126 .sr(1) 6127 .m(2) 6128 .n(16) 6129 .k(k) 6130 .ks(3) 6131 .a_offset(83) 6132 .zero_index(mz) 6133 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6134 } 6135 } 6136 } 6137 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,qmin)6138 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, qmin) { 6139 TEST_REQUIRES_ARM_NEON_DOT; 6140 GemmMicrokernelTester() 6141 .mr(2) 6142 .nr(16) 6143 .kr(4) 6144 .sr(1) 6145 .m(2) 6146 .n(16) 6147 .k(8) 6148 .qmin(128) 6149 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6150 } 6151 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,qmax)6152 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, qmax) { 6153 TEST_REQUIRES_ARM_NEON_DOT; 6154 GemmMicrokernelTester() 6155 .mr(2) 6156 .nr(16) 6157 .kr(4) 6158 .sr(1) 6159 .m(2) 6160 .n(16) 6161 .k(8) 6162 .qmax(128) 6163 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6164 } 6165 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,strided_cm)6166 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, strided_cm) { 6167 TEST_REQUIRES_ARM_NEON_DOT; 6168 GemmMicrokernelTester() 6169 .mr(2) 6170 .nr(16) 6171 .kr(4) 6172 .sr(1) 6173 .m(2) 6174 .n(16) 6175 .k(8) 6176 .cm_stride(19) 6177 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6178 } 6179 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,no_a_zero_point)6180 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, no_a_zero_point) { 6181 TEST_REQUIRES_ARM_NEON_DOT; 6182 for (size_t k = 1; k <= 40; k += 9) { 6183 GemmMicrokernelTester() 6184 .mr(2) 6185 .nr(16) 6186 .kr(4) 6187 .sr(1) 6188 .m(2) 6189 .n(16) 6190 .k(k) 6191 .a_zero_point(0) 6192 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6193 } 6194 } 6195 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,no_b_zero_point)6196 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, no_b_zero_point) { 6197 TEST_REQUIRES_ARM_NEON_DOT; 6198 for (size_t k = 1; k <= 40; k += 9) { 6199 GemmMicrokernelTester() 6200 .mr(2) 6201 .nr(16) 6202 .kr(4) 6203 .sr(1) 6204 .m(2) 6205 .n(16) 6206 .k(k) 6207 .b_zero_point(0) 6208 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6209 } 6210 } 6211 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT,no_zero_point)6212 TEST(QU8_IGEMM_MINMAX_RNDNU_2X16C4__NEONDOT, no_zero_point) { 6213 TEST_REQUIRES_ARM_NEON_DOT; 6214 for (size_t k = 1; k <= 40; k += 9) { 6215 GemmMicrokernelTester() 6216 .mr(2) 6217 .nr(16) 6218 .kr(4) 6219 .sr(1) 6220 .m(2) 6221 .n(16) 6222 .k(k) 6223 .a_zero_point(0) 6224 .b_zero_point(0) 6225 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6226 } 6227 } 6228 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64) 6229 6230 6231 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64) TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,k_eq_8)6232 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_eq_8) { 6233 TEST_REQUIRES_ARM_NEON_DOT; 6234 GemmMicrokernelTester() 6235 .mr(2) 6236 .nr(32) 6237 .kr(4) 6238 .sr(1) 6239 .m(2) 6240 .n(32) 6241 .k(8) 6242 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6243 } 6244 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,strided_cn)6245 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, strided_cn) { 6246 TEST_REQUIRES_ARM_NEON_DOT; 6247 GemmMicrokernelTester() 6248 .mr(2) 6249 .nr(32) 6250 .kr(4) 6251 .sr(1) 6252 .m(2) 6253 .n(32) 6254 .k(8) 6255 .cn_stride(37) 6256 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6257 } 6258 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,k_eq_8_subtile)6259 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_eq_8_subtile) { 6260 TEST_REQUIRES_ARM_NEON_DOT; 6261 for (uint32_t n = 1; n <= 32; n++) { 6262 for (uint32_t m = 1; m <= 2; m++) { 6263 GemmMicrokernelTester() 6264 .mr(2) 6265 .nr(32) 6266 .kr(4) 6267 .sr(1) 6268 .m(m) 6269 .n(n) 6270 .k(8) 6271 .iterations(1) 6272 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6273 } 6274 } 6275 } 6276 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,k_eq_8_subtile_m)6277 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_eq_8_subtile_m) { 6278 TEST_REQUIRES_ARM_NEON_DOT; 6279 for (uint32_t m = 1; m <= 2; m++) { 6280 GemmMicrokernelTester() 6281 .mr(2) 6282 .nr(32) 6283 .kr(4) 6284 .sr(1) 6285 .m(m) 6286 .n(32) 6287 .k(8) 6288 .iterations(1) 6289 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6290 } 6291 } 6292 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,k_eq_8_subtile_n)6293 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_eq_8_subtile_n) { 6294 TEST_REQUIRES_ARM_NEON_DOT; 6295 for (uint32_t n = 1; n <= 32; n++) { 6296 GemmMicrokernelTester() 6297 .mr(2) 6298 .nr(32) 6299 .kr(4) 6300 .sr(1) 6301 .m(2) 6302 .n(n) 6303 .k(8) 6304 .iterations(1) 6305 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6306 } 6307 } 6308 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,k_lt_8)6309 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_lt_8) { 6310 TEST_REQUIRES_ARM_NEON_DOT; 6311 for (size_t k = 1; k < 8; k++) { 6312 GemmMicrokernelTester() 6313 .mr(2) 6314 .nr(32) 6315 .kr(4) 6316 .sr(1) 6317 .m(2) 6318 .n(32) 6319 .k(k) 6320 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6321 } 6322 } 6323 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,k_lt_8_subtile)6324 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_lt_8_subtile) { 6325 TEST_REQUIRES_ARM_NEON_DOT; 6326 for (size_t k = 1; k < 8; k++) { 6327 for (uint32_t n = 1; n <= 32; n++) { 6328 for (uint32_t m = 1; m <= 2; m++) { 6329 GemmMicrokernelTester() 6330 .mr(2) 6331 .nr(32) 6332 .kr(4) 6333 .sr(1) 6334 .m(m) 6335 .n(n) 6336 .k(k) 6337 .iterations(1) 6338 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6339 } 6340 } 6341 } 6342 } 6343 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,k_gt_8)6344 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_gt_8) { 6345 TEST_REQUIRES_ARM_NEON_DOT; 6346 for (size_t k = 9; k < 16; k++) { 6347 GemmMicrokernelTester() 6348 .mr(2) 6349 .nr(32) 6350 .kr(4) 6351 .sr(1) 6352 .m(2) 6353 .n(32) 6354 .k(k) 6355 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6356 } 6357 } 6358 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,k_gt_8_subtile)6359 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_gt_8_subtile) { 6360 TEST_REQUIRES_ARM_NEON_DOT; 6361 for (size_t k = 9; k < 16; k++) { 6362 for (uint32_t n = 1; n <= 32; n++) { 6363 for (uint32_t m = 1; m <= 2; m++) { 6364 GemmMicrokernelTester() 6365 .mr(2) 6366 .nr(32) 6367 .kr(4) 6368 .sr(1) 6369 .m(m) 6370 .n(n) 6371 .k(k) 6372 .iterations(1) 6373 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6374 } 6375 } 6376 } 6377 } 6378 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,k_div_8)6379 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_div_8) { 6380 TEST_REQUIRES_ARM_NEON_DOT; 6381 for (size_t k = 16; k <= 80; k += 8) { 6382 GemmMicrokernelTester() 6383 .mr(2) 6384 .nr(32) 6385 .kr(4) 6386 .sr(1) 6387 .m(2) 6388 .n(32) 6389 .k(k) 6390 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6391 } 6392 } 6393 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,k_div_8_subtile)6394 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_div_8_subtile) { 6395 TEST_REQUIRES_ARM_NEON_DOT; 6396 for (size_t k = 16; k <= 80; k += 8) { 6397 for (uint32_t n = 1; n <= 32; n++) { 6398 for (uint32_t m = 1; m <= 2; m++) { 6399 GemmMicrokernelTester() 6400 .mr(2) 6401 .nr(32) 6402 .kr(4) 6403 .sr(1) 6404 .m(m) 6405 .n(n) 6406 .k(k) 6407 .iterations(1) 6408 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6409 } 6410 } 6411 } 6412 } 6413 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,n_gt_32)6414 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, n_gt_32) { 6415 TEST_REQUIRES_ARM_NEON_DOT; 6416 for (uint32_t n = 33; n < 64; n++) { 6417 for (size_t k = 1; k <= 40; k += 9) { 6418 GemmMicrokernelTester() 6419 .mr(2) 6420 .nr(32) 6421 .kr(4) 6422 .sr(1) 6423 .m(2) 6424 .n(n) 6425 .k(k) 6426 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6427 } 6428 } 6429 } 6430 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,n_gt_32_strided_cn)6431 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, n_gt_32_strided_cn) { 6432 TEST_REQUIRES_ARM_NEON_DOT; 6433 for (uint32_t n = 33; n < 64; n++) { 6434 for (size_t k = 1; k <= 40; k += 9) { 6435 GemmMicrokernelTester() 6436 .mr(2) 6437 .nr(32) 6438 .kr(4) 6439 .sr(1) 6440 .m(2) 6441 .n(n) 6442 .k(k) 6443 .cn_stride(37) 6444 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6445 } 6446 } 6447 } 6448 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,n_gt_32_subtile)6449 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, n_gt_32_subtile) { 6450 TEST_REQUIRES_ARM_NEON_DOT; 6451 for (uint32_t n = 33; n < 64; n++) { 6452 for (size_t k = 1; k <= 40; k += 9) { 6453 for (uint32_t m = 1; m <= 2; m++) { 6454 GemmMicrokernelTester() 6455 .mr(2) 6456 .nr(32) 6457 .kr(4) 6458 .sr(1) 6459 .m(m) 6460 .n(n) 6461 .k(k) 6462 .iterations(1) 6463 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6464 } 6465 } 6466 } 6467 } 6468 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,n_div_32)6469 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, n_div_32) { 6470 TEST_REQUIRES_ARM_NEON_DOT; 6471 for (uint32_t n = 64; n <= 96; n += 32) { 6472 for (size_t k = 1; k <= 40; k += 9) { 6473 GemmMicrokernelTester() 6474 .mr(2) 6475 .nr(32) 6476 .kr(4) 6477 .sr(1) 6478 .m(2) 6479 .n(n) 6480 .k(k) 6481 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6482 } 6483 } 6484 } 6485 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,n_div_32_strided_cn)6486 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, n_div_32_strided_cn) { 6487 TEST_REQUIRES_ARM_NEON_DOT; 6488 for (uint32_t n = 64; n <= 96; n += 32) { 6489 for (size_t k = 1; k <= 40; k += 9) { 6490 GemmMicrokernelTester() 6491 .mr(2) 6492 .nr(32) 6493 .kr(4) 6494 .sr(1) 6495 .m(2) 6496 .n(n) 6497 .k(k) 6498 .cn_stride(37) 6499 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6500 } 6501 } 6502 } 6503 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,n_div_32_subtile)6504 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, n_div_32_subtile) { 6505 TEST_REQUIRES_ARM_NEON_DOT; 6506 for (uint32_t n = 64; n <= 96; n += 32) { 6507 for (size_t k = 1; k <= 40; k += 9) { 6508 for (uint32_t m = 1; m <= 2; m++) { 6509 GemmMicrokernelTester() 6510 .mr(2) 6511 .nr(32) 6512 .kr(4) 6513 .sr(1) 6514 .m(m) 6515 .n(n) 6516 .k(k) 6517 .iterations(1) 6518 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6519 } 6520 } 6521 } 6522 } 6523 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,small_kernel)6524 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, small_kernel) { 6525 TEST_REQUIRES_ARM_NEON_DOT; 6526 for (size_t k = 1; k <= 40; k += 9) { 6527 GemmMicrokernelTester() 6528 .mr(2) 6529 .nr(32) 6530 .kr(4) 6531 .sr(1) 6532 .m(2) 6533 .n(32) 6534 .k(k) 6535 .ks(3) 6536 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6537 } 6538 } 6539 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,small_kernel_subtile)6540 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, small_kernel_subtile) { 6541 TEST_REQUIRES_ARM_NEON_DOT; 6542 for (size_t k = 1; k <= 40; k += 9) { 6543 for (uint32_t n = 1; n <= 32; n++) { 6544 for (uint32_t m = 1; m <= 2; m++) { 6545 GemmMicrokernelTester() 6546 .mr(2) 6547 .nr(32) 6548 .kr(4) 6549 .sr(1) 6550 .m(m) 6551 .n(n) 6552 .k(k) 6553 .ks(3) 6554 .iterations(1) 6555 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6556 } 6557 } 6558 } 6559 } 6560 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,n_gt_32_small_kernel)6561 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, n_gt_32_small_kernel) { 6562 TEST_REQUIRES_ARM_NEON_DOT; 6563 for (uint32_t n = 33; n < 64; n++) { 6564 for (size_t k = 1; k <= 40; k += 9) { 6565 GemmMicrokernelTester() 6566 .mr(2) 6567 .nr(32) 6568 .kr(4) 6569 .sr(1) 6570 .m(2) 6571 .n(n) 6572 .k(k) 6573 .ks(3) 6574 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6575 } 6576 } 6577 } 6578 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,n_div_32_small_kernel)6579 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, n_div_32_small_kernel) { 6580 TEST_REQUIRES_ARM_NEON_DOT; 6581 for (uint32_t n = 64; n <= 96; n += 32) { 6582 for (size_t k = 1; k <= 40; k += 9) { 6583 GemmMicrokernelTester() 6584 .mr(2) 6585 .nr(32) 6586 .kr(4) 6587 .sr(1) 6588 .m(2) 6589 .n(n) 6590 .k(k) 6591 .ks(3) 6592 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6593 } 6594 } 6595 } 6596 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,strided_cm_subtile)6597 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, strided_cm_subtile) { 6598 TEST_REQUIRES_ARM_NEON_DOT; 6599 for (size_t k = 1; k <= 40; k += 9) { 6600 for (uint32_t n = 1; n <= 32; n++) { 6601 for (uint32_t m = 1; m <= 2; m++) { 6602 GemmMicrokernelTester() 6603 .mr(2) 6604 .nr(32) 6605 .kr(4) 6606 .sr(1) 6607 .m(m) 6608 .n(n) 6609 .k(k) 6610 .cm_stride(37) 6611 .iterations(1) 6612 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6613 } 6614 } 6615 } 6616 } 6617 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,a_offset)6618 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, a_offset) { 6619 TEST_REQUIRES_ARM_NEON_DOT; 6620 for (size_t k = 1; k <= 40; k += 9) { 6621 GemmMicrokernelTester() 6622 .mr(2) 6623 .nr(32) 6624 .kr(4) 6625 .sr(1) 6626 .m(2) 6627 .n(32) 6628 .k(k) 6629 .ks(3) 6630 .a_offset(83) 6631 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6632 } 6633 } 6634 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,zero)6635 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, zero) { 6636 TEST_REQUIRES_ARM_NEON_DOT; 6637 for (size_t k = 1; k <= 40; k += 9) { 6638 for (uint32_t mz = 0; mz < 2; mz++) { 6639 GemmMicrokernelTester() 6640 .mr(2) 6641 .nr(32) 6642 .kr(4) 6643 .sr(1) 6644 .m(2) 6645 .n(32) 6646 .k(k) 6647 .ks(3) 6648 .a_offset(83) 6649 .zero_index(mz) 6650 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6651 } 6652 } 6653 } 6654 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,qmin)6655 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, qmin) { 6656 TEST_REQUIRES_ARM_NEON_DOT; 6657 GemmMicrokernelTester() 6658 .mr(2) 6659 .nr(32) 6660 .kr(4) 6661 .sr(1) 6662 .m(2) 6663 .n(32) 6664 .k(8) 6665 .qmin(128) 6666 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6667 } 6668 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,qmax)6669 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, qmax) { 6670 TEST_REQUIRES_ARM_NEON_DOT; 6671 GemmMicrokernelTester() 6672 .mr(2) 6673 .nr(32) 6674 .kr(4) 6675 .sr(1) 6676 .m(2) 6677 .n(32) 6678 .k(8) 6679 .qmax(128) 6680 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6681 } 6682 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,strided_cm)6683 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, strided_cm) { 6684 TEST_REQUIRES_ARM_NEON_DOT; 6685 GemmMicrokernelTester() 6686 .mr(2) 6687 .nr(32) 6688 .kr(4) 6689 .sr(1) 6690 .m(2) 6691 .n(32) 6692 .k(8) 6693 .cm_stride(37) 6694 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6695 } 6696 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,no_a_zero_point)6697 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, no_a_zero_point) { 6698 TEST_REQUIRES_ARM_NEON_DOT; 6699 for (size_t k = 1; k <= 40; k += 9) { 6700 GemmMicrokernelTester() 6701 .mr(2) 6702 .nr(32) 6703 .kr(4) 6704 .sr(1) 6705 .m(2) 6706 .n(32) 6707 .k(k) 6708 .a_zero_point(0) 6709 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6710 } 6711 } 6712 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,no_b_zero_point)6713 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, no_b_zero_point) { 6714 TEST_REQUIRES_ARM_NEON_DOT; 6715 for (size_t k = 1; k <= 40; k += 9) { 6716 GemmMicrokernelTester() 6717 .mr(2) 6718 .nr(32) 6719 .kr(4) 6720 .sr(1) 6721 .m(2) 6722 .n(32) 6723 .k(k) 6724 .b_zero_point(0) 6725 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6726 } 6727 } 6728 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT,no_zero_point)6729 TEST(QU8_IGEMM_MINMAX_RNDNU_2X32C4__NEONDOT, no_zero_point) { 6730 TEST_REQUIRES_ARM_NEON_DOT; 6731 for (size_t k = 1; k <= 40; k += 9) { 6732 GemmMicrokernelTester() 6733 .mr(2) 6734 .nr(32) 6735 .kr(4) 6736 .sr(1) 6737 .m(2) 6738 .n(32) 6739 .k(k) 6740 .a_zero_point(0) 6741 .b_zero_point(0) 6742 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6743 } 6744 } 6745 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64) 6746 6747 6748 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_eq_8)6749 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_eq_8) { 6750 TEST_REQUIRES_ARM_NEON; 6751 GemmMicrokernelTester() 6752 .mr(3) 6753 .nr(16) 6754 .kr(1) 6755 .sr(1) 6756 .m(3) 6757 .n(16) 6758 .k(8) 6759 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6760 } 6761 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,strided_cn)6762 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, strided_cn) { 6763 TEST_REQUIRES_ARM_NEON; 6764 GemmMicrokernelTester() 6765 .mr(3) 6766 .nr(16) 6767 .kr(1) 6768 .sr(1) 6769 .m(3) 6770 .n(16) 6771 .k(8) 6772 .cn_stride(19) 6773 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6774 } 6775 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_eq_8_subtile)6776 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_eq_8_subtile) { 6777 TEST_REQUIRES_ARM_NEON; 6778 for (uint32_t n = 1; n <= 16; n++) { 6779 for (uint32_t m = 1; m <= 3; m++) { 6780 GemmMicrokernelTester() 6781 .mr(3) 6782 .nr(16) 6783 .kr(1) 6784 .sr(1) 6785 .m(m) 6786 .n(n) 6787 .k(8) 6788 .iterations(1) 6789 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6790 } 6791 } 6792 } 6793 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_eq_8_subtile_m)6794 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_eq_8_subtile_m) { 6795 TEST_REQUIRES_ARM_NEON; 6796 for (uint32_t m = 1; m <= 3; m++) { 6797 GemmMicrokernelTester() 6798 .mr(3) 6799 .nr(16) 6800 .kr(1) 6801 .sr(1) 6802 .m(m) 6803 .n(16) 6804 .k(8) 6805 .iterations(1) 6806 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6807 } 6808 } 6809 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_eq_8_subtile_n)6810 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_eq_8_subtile_n) { 6811 TEST_REQUIRES_ARM_NEON; 6812 for (uint32_t n = 1; n <= 16; n++) { 6813 GemmMicrokernelTester() 6814 .mr(3) 6815 .nr(16) 6816 .kr(1) 6817 .sr(1) 6818 .m(3) 6819 .n(n) 6820 .k(8) 6821 .iterations(1) 6822 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6823 } 6824 } 6825 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_lt_8)6826 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_lt_8) { 6827 TEST_REQUIRES_ARM_NEON; 6828 for (size_t k = 1; k < 8; k++) { 6829 GemmMicrokernelTester() 6830 .mr(3) 6831 .nr(16) 6832 .kr(1) 6833 .sr(1) 6834 .m(3) 6835 .n(16) 6836 .k(k) 6837 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6838 } 6839 } 6840 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_lt_8_subtile)6841 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_lt_8_subtile) { 6842 TEST_REQUIRES_ARM_NEON; 6843 for (size_t k = 1; k < 8; k++) { 6844 for (uint32_t n = 1; n <= 16; n++) { 6845 for (uint32_t m = 1; m <= 3; m++) { 6846 GemmMicrokernelTester() 6847 .mr(3) 6848 .nr(16) 6849 .kr(1) 6850 .sr(1) 6851 .m(m) 6852 .n(n) 6853 .k(k) 6854 .iterations(1) 6855 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6856 } 6857 } 6858 } 6859 } 6860 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_gt_8)6861 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_gt_8) { 6862 TEST_REQUIRES_ARM_NEON; 6863 for (size_t k = 9; k < 16; k++) { 6864 GemmMicrokernelTester() 6865 .mr(3) 6866 .nr(16) 6867 .kr(1) 6868 .sr(1) 6869 .m(3) 6870 .n(16) 6871 .k(k) 6872 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6873 } 6874 } 6875 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_gt_8_subtile)6876 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_gt_8_subtile) { 6877 TEST_REQUIRES_ARM_NEON; 6878 for (size_t k = 9; k < 16; k++) { 6879 for (uint32_t n = 1; n <= 16; n++) { 6880 for (uint32_t m = 1; m <= 3; m++) { 6881 GemmMicrokernelTester() 6882 .mr(3) 6883 .nr(16) 6884 .kr(1) 6885 .sr(1) 6886 .m(m) 6887 .n(n) 6888 .k(k) 6889 .iterations(1) 6890 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6891 } 6892 } 6893 } 6894 } 6895 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_div_8)6896 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_div_8) { 6897 TEST_REQUIRES_ARM_NEON; 6898 for (size_t k = 16; k <= 80; k += 8) { 6899 GemmMicrokernelTester() 6900 .mr(3) 6901 .nr(16) 6902 .kr(1) 6903 .sr(1) 6904 .m(3) 6905 .n(16) 6906 .k(k) 6907 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6908 } 6909 } 6910 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,k_div_8_subtile)6911 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_div_8_subtile) { 6912 TEST_REQUIRES_ARM_NEON; 6913 for (size_t k = 16; k <= 80; k += 8) { 6914 for (uint32_t n = 1; n <= 16; n++) { 6915 for (uint32_t m = 1; m <= 3; m++) { 6916 GemmMicrokernelTester() 6917 .mr(3) 6918 .nr(16) 6919 .kr(1) 6920 .sr(1) 6921 .m(m) 6922 .n(n) 6923 .k(k) 6924 .iterations(1) 6925 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6926 } 6927 } 6928 } 6929 } 6930 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,n_gt_16)6931 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_gt_16) { 6932 TEST_REQUIRES_ARM_NEON; 6933 for (uint32_t n = 17; n < 32; n++) { 6934 for (size_t k = 1; k <= 40; k += 9) { 6935 GemmMicrokernelTester() 6936 .mr(3) 6937 .nr(16) 6938 .kr(1) 6939 .sr(1) 6940 .m(3) 6941 .n(n) 6942 .k(k) 6943 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6944 } 6945 } 6946 } 6947 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,n_gt_16_strided_cn)6948 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_gt_16_strided_cn) { 6949 TEST_REQUIRES_ARM_NEON; 6950 for (uint32_t n = 17; n < 32; n++) { 6951 for (size_t k = 1; k <= 40; k += 9) { 6952 GemmMicrokernelTester() 6953 .mr(3) 6954 .nr(16) 6955 .kr(1) 6956 .sr(1) 6957 .m(3) 6958 .n(n) 6959 .k(k) 6960 .cn_stride(19) 6961 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6962 } 6963 } 6964 } 6965 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,n_gt_16_subtile)6966 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_gt_16_subtile) { 6967 TEST_REQUIRES_ARM_NEON; 6968 for (uint32_t n = 17; n < 32; n++) { 6969 for (size_t k = 1; k <= 40; k += 9) { 6970 for (uint32_t m = 1; m <= 3; m++) { 6971 GemmMicrokernelTester() 6972 .mr(3) 6973 .nr(16) 6974 .kr(1) 6975 .sr(1) 6976 .m(m) 6977 .n(n) 6978 .k(k) 6979 .iterations(1) 6980 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6981 } 6982 } 6983 } 6984 } 6985 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,n_div_16)6986 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_div_16) { 6987 TEST_REQUIRES_ARM_NEON; 6988 for (uint32_t n = 32; n <= 48; n += 16) { 6989 for (size_t k = 1; k <= 40; k += 9) { 6990 GemmMicrokernelTester() 6991 .mr(3) 6992 .nr(16) 6993 .kr(1) 6994 .sr(1) 6995 .m(3) 6996 .n(n) 6997 .k(k) 6998 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6999 } 7000 } 7001 } 7002 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,n_div_16_strided_cn)7003 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_div_16_strided_cn) { 7004 TEST_REQUIRES_ARM_NEON; 7005 for (uint32_t n = 32; n <= 48; n += 16) { 7006 for (size_t k = 1; k <= 40; k += 9) { 7007 GemmMicrokernelTester() 7008 .mr(3) 7009 .nr(16) 7010 .kr(1) 7011 .sr(1) 7012 .m(3) 7013 .n(n) 7014 .k(k) 7015 .cn_stride(19) 7016 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7017 } 7018 } 7019 } 7020 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,n_div_16_subtile)7021 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_div_16_subtile) { 7022 TEST_REQUIRES_ARM_NEON; 7023 for (uint32_t n = 32; n <= 48; n += 16) { 7024 for (size_t k = 1; k <= 40; k += 9) { 7025 for (uint32_t m = 1; m <= 3; m++) { 7026 GemmMicrokernelTester() 7027 .mr(3) 7028 .nr(16) 7029 .kr(1) 7030 .sr(1) 7031 .m(m) 7032 .n(n) 7033 .k(k) 7034 .iterations(1) 7035 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7036 } 7037 } 7038 } 7039 } 7040 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,small_kernel)7041 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, small_kernel) { 7042 TEST_REQUIRES_ARM_NEON; 7043 for (size_t k = 1; k <= 40; k += 9) { 7044 GemmMicrokernelTester() 7045 .mr(3) 7046 .nr(16) 7047 .kr(1) 7048 .sr(1) 7049 .m(3) 7050 .n(16) 7051 .k(k) 7052 .ks(3) 7053 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7054 } 7055 } 7056 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,small_kernel_subtile)7057 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, small_kernel_subtile) { 7058 TEST_REQUIRES_ARM_NEON; 7059 for (size_t k = 1; k <= 40; k += 9) { 7060 for (uint32_t n = 1; n <= 16; n++) { 7061 for (uint32_t m = 1; m <= 3; m++) { 7062 GemmMicrokernelTester() 7063 .mr(3) 7064 .nr(16) 7065 .kr(1) 7066 .sr(1) 7067 .m(m) 7068 .n(n) 7069 .k(k) 7070 .ks(3) 7071 .iterations(1) 7072 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7073 } 7074 } 7075 } 7076 } 7077 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,n_gt_16_small_kernel)7078 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_gt_16_small_kernel) { 7079 TEST_REQUIRES_ARM_NEON; 7080 for (uint32_t n = 17; n < 32; n++) { 7081 for (size_t k = 1; k <= 40; k += 9) { 7082 GemmMicrokernelTester() 7083 .mr(3) 7084 .nr(16) 7085 .kr(1) 7086 .sr(1) 7087 .m(3) 7088 .n(n) 7089 .k(k) 7090 .ks(3) 7091 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7092 } 7093 } 7094 } 7095 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,n_div_16_small_kernel)7096 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_div_16_small_kernel) { 7097 TEST_REQUIRES_ARM_NEON; 7098 for (uint32_t n = 32; n <= 48; n += 16) { 7099 for (size_t k = 1; k <= 40; k += 9) { 7100 GemmMicrokernelTester() 7101 .mr(3) 7102 .nr(16) 7103 .kr(1) 7104 .sr(1) 7105 .m(3) 7106 .n(n) 7107 .k(k) 7108 .ks(3) 7109 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7110 } 7111 } 7112 } 7113 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,strided_cm_subtile)7114 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, strided_cm_subtile) { 7115 TEST_REQUIRES_ARM_NEON; 7116 for (size_t k = 1; k <= 40; k += 9) { 7117 for (uint32_t n = 1; n <= 16; n++) { 7118 for (uint32_t m = 1; m <= 3; m++) { 7119 GemmMicrokernelTester() 7120 .mr(3) 7121 .nr(16) 7122 .kr(1) 7123 .sr(1) 7124 .m(m) 7125 .n(n) 7126 .k(k) 7127 .cm_stride(19) 7128 .iterations(1) 7129 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7130 } 7131 } 7132 } 7133 } 7134 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,a_offset)7135 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, a_offset) { 7136 TEST_REQUIRES_ARM_NEON; 7137 for (size_t k = 1; k <= 40; k += 9) { 7138 GemmMicrokernelTester() 7139 .mr(3) 7140 .nr(16) 7141 .kr(1) 7142 .sr(1) 7143 .m(3) 7144 .n(16) 7145 .k(k) 7146 .ks(3) 7147 .a_offset(127) 7148 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7149 } 7150 } 7151 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,zero)7152 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, zero) { 7153 TEST_REQUIRES_ARM_NEON; 7154 for (size_t k = 1; k <= 40; k += 9) { 7155 for (uint32_t mz = 0; mz < 3; mz++) { 7156 GemmMicrokernelTester() 7157 .mr(3) 7158 .nr(16) 7159 .kr(1) 7160 .sr(1) 7161 .m(3) 7162 .n(16) 7163 .k(k) 7164 .ks(3) 7165 .a_offset(127) 7166 .zero_index(mz) 7167 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7168 } 7169 } 7170 } 7171 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,qmin)7172 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, qmin) { 7173 TEST_REQUIRES_ARM_NEON; 7174 GemmMicrokernelTester() 7175 .mr(3) 7176 .nr(16) 7177 .kr(1) 7178 .sr(1) 7179 .m(3) 7180 .n(16) 7181 .k(8) 7182 .qmin(128) 7183 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7184 } 7185 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,qmax)7186 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, qmax) { 7187 TEST_REQUIRES_ARM_NEON; 7188 GemmMicrokernelTester() 7189 .mr(3) 7190 .nr(16) 7191 .kr(1) 7192 .sr(1) 7193 .m(3) 7194 .n(16) 7195 .k(8) 7196 .qmax(128) 7197 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7198 } 7199 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,strided_cm)7200 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, strided_cm) { 7201 TEST_REQUIRES_ARM_NEON; 7202 GemmMicrokernelTester() 7203 .mr(3) 7204 .nr(16) 7205 .kr(1) 7206 .sr(1) 7207 .m(3) 7208 .n(16) 7209 .k(8) 7210 .cm_stride(19) 7211 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7212 } 7213 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,no_a_zero_point)7214 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, no_a_zero_point) { 7215 TEST_REQUIRES_ARM_NEON; 7216 for (size_t k = 1; k <= 40; k += 9) { 7217 GemmMicrokernelTester() 7218 .mr(3) 7219 .nr(16) 7220 .kr(1) 7221 .sr(1) 7222 .m(3) 7223 .n(16) 7224 .k(k) 7225 .a_zero_point(0) 7226 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7227 } 7228 } 7229 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,no_b_zero_point)7230 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, no_b_zero_point) { 7231 TEST_REQUIRES_ARM_NEON; 7232 for (size_t k = 1; k <= 40; k += 9) { 7233 GemmMicrokernelTester() 7234 .mr(3) 7235 .nr(16) 7236 .kr(1) 7237 .sr(1) 7238 .m(3) 7239 .n(16) 7240 .k(k) 7241 .b_zero_point(0) 7242 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7243 } 7244 } 7245 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE,no_zero_point)7246 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, no_zero_point) { 7247 TEST_REQUIRES_ARM_NEON; 7248 for (size_t k = 1; k <= 40; k += 9) { 7249 GemmMicrokernelTester() 7250 .mr(3) 7251 .nr(16) 7252 .kr(1) 7253 .sr(1) 7254 .m(3) 7255 .n(16) 7256 .k(k) 7257 .a_zero_point(0) 7258 .b_zero_point(0) 7259 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7260 } 7261 } 7262 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 7263 7264 7265 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64) TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,k_eq_8)7266 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_eq_8) { 7267 TEST_REQUIRES_ARM_NEON_DOT; 7268 GemmMicrokernelTester() 7269 .mr(3) 7270 .nr(16) 7271 .kr(4) 7272 .sr(1) 7273 .m(3) 7274 .n(16) 7275 .k(8) 7276 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7277 } 7278 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,strided_cn)7279 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, strided_cn) { 7280 TEST_REQUIRES_ARM_NEON_DOT; 7281 GemmMicrokernelTester() 7282 .mr(3) 7283 .nr(16) 7284 .kr(4) 7285 .sr(1) 7286 .m(3) 7287 .n(16) 7288 .k(8) 7289 .cn_stride(19) 7290 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7291 } 7292 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,k_eq_8_subtile)7293 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_eq_8_subtile) { 7294 TEST_REQUIRES_ARM_NEON_DOT; 7295 for (uint32_t n = 1; n <= 16; n++) { 7296 for (uint32_t m = 1; m <= 3; m++) { 7297 GemmMicrokernelTester() 7298 .mr(3) 7299 .nr(16) 7300 .kr(4) 7301 .sr(1) 7302 .m(m) 7303 .n(n) 7304 .k(8) 7305 .iterations(1) 7306 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7307 } 7308 } 7309 } 7310 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,k_eq_8_subtile_m)7311 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_eq_8_subtile_m) { 7312 TEST_REQUIRES_ARM_NEON_DOT; 7313 for (uint32_t m = 1; m <= 3; m++) { 7314 GemmMicrokernelTester() 7315 .mr(3) 7316 .nr(16) 7317 .kr(4) 7318 .sr(1) 7319 .m(m) 7320 .n(16) 7321 .k(8) 7322 .iterations(1) 7323 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7324 } 7325 } 7326 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,k_eq_8_subtile_n)7327 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_eq_8_subtile_n) { 7328 TEST_REQUIRES_ARM_NEON_DOT; 7329 for (uint32_t n = 1; n <= 16; n++) { 7330 GemmMicrokernelTester() 7331 .mr(3) 7332 .nr(16) 7333 .kr(4) 7334 .sr(1) 7335 .m(3) 7336 .n(n) 7337 .k(8) 7338 .iterations(1) 7339 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7340 } 7341 } 7342 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,k_lt_8)7343 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_lt_8) { 7344 TEST_REQUIRES_ARM_NEON_DOT; 7345 for (size_t k = 1; k < 8; k++) { 7346 GemmMicrokernelTester() 7347 .mr(3) 7348 .nr(16) 7349 .kr(4) 7350 .sr(1) 7351 .m(3) 7352 .n(16) 7353 .k(k) 7354 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7355 } 7356 } 7357 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,k_lt_8_subtile)7358 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_lt_8_subtile) { 7359 TEST_REQUIRES_ARM_NEON_DOT; 7360 for (size_t k = 1; k < 8; k++) { 7361 for (uint32_t n = 1; n <= 16; n++) { 7362 for (uint32_t m = 1; m <= 3; m++) { 7363 GemmMicrokernelTester() 7364 .mr(3) 7365 .nr(16) 7366 .kr(4) 7367 .sr(1) 7368 .m(m) 7369 .n(n) 7370 .k(k) 7371 .iterations(1) 7372 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7373 } 7374 } 7375 } 7376 } 7377 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,k_gt_8)7378 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_gt_8) { 7379 TEST_REQUIRES_ARM_NEON_DOT; 7380 for (size_t k = 9; k < 16; k++) { 7381 GemmMicrokernelTester() 7382 .mr(3) 7383 .nr(16) 7384 .kr(4) 7385 .sr(1) 7386 .m(3) 7387 .n(16) 7388 .k(k) 7389 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7390 } 7391 } 7392 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,k_gt_8_subtile)7393 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_gt_8_subtile) { 7394 TEST_REQUIRES_ARM_NEON_DOT; 7395 for (size_t k = 9; k < 16; k++) { 7396 for (uint32_t n = 1; n <= 16; n++) { 7397 for (uint32_t m = 1; m <= 3; m++) { 7398 GemmMicrokernelTester() 7399 .mr(3) 7400 .nr(16) 7401 .kr(4) 7402 .sr(1) 7403 .m(m) 7404 .n(n) 7405 .k(k) 7406 .iterations(1) 7407 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7408 } 7409 } 7410 } 7411 } 7412 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,k_div_8)7413 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_div_8) { 7414 TEST_REQUIRES_ARM_NEON_DOT; 7415 for (size_t k = 16; k <= 80; k += 8) { 7416 GemmMicrokernelTester() 7417 .mr(3) 7418 .nr(16) 7419 .kr(4) 7420 .sr(1) 7421 .m(3) 7422 .n(16) 7423 .k(k) 7424 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7425 } 7426 } 7427 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,k_div_8_subtile)7428 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_div_8_subtile) { 7429 TEST_REQUIRES_ARM_NEON_DOT; 7430 for (size_t k = 16; k <= 80; k += 8) { 7431 for (uint32_t n = 1; n <= 16; n++) { 7432 for (uint32_t m = 1; m <= 3; m++) { 7433 GemmMicrokernelTester() 7434 .mr(3) 7435 .nr(16) 7436 .kr(4) 7437 .sr(1) 7438 .m(m) 7439 .n(n) 7440 .k(k) 7441 .iterations(1) 7442 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7443 } 7444 } 7445 } 7446 } 7447 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,n_gt_16)7448 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, n_gt_16) { 7449 TEST_REQUIRES_ARM_NEON_DOT; 7450 for (uint32_t n = 17; n < 32; n++) { 7451 for (size_t k = 1; k <= 40; k += 9) { 7452 GemmMicrokernelTester() 7453 .mr(3) 7454 .nr(16) 7455 .kr(4) 7456 .sr(1) 7457 .m(3) 7458 .n(n) 7459 .k(k) 7460 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7461 } 7462 } 7463 } 7464 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,n_gt_16_strided_cn)7465 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, n_gt_16_strided_cn) { 7466 TEST_REQUIRES_ARM_NEON_DOT; 7467 for (uint32_t n = 17; n < 32; n++) { 7468 for (size_t k = 1; k <= 40; k += 9) { 7469 GemmMicrokernelTester() 7470 .mr(3) 7471 .nr(16) 7472 .kr(4) 7473 .sr(1) 7474 .m(3) 7475 .n(n) 7476 .k(k) 7477 .cn_stride(19) 7478 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7479 } 7480 } 7481 } 7482 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,n_gt_16_subtile)7483 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, n_gt_16_subtile) { 7484 TEST_REQUIRES_ARM_NEON_DOT; 7485 for (uint32_t n = 17; n < 32; n++) { 7486 for (size_t k = 1; k <= 40; k += 9) { 7487 for (uint32_t m = 1; m <= 3; m++) { 7488 GemmMicrokernelTester() 7489 .mr(3) 7490 .nr(16) 7491 .kr(4) 7492 .sr(1) 7493 .m(m) 7494 .n(n) 7495 .k(k) 7496 .iterations(1) 7497 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7498 } 7499 } 7500 } 7501 } 7502 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,n_div_16)7503 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, n_div_16) { 7504 TEST_REQUIRES_ARM_NEON_DOT; 7505 for (uint32_t n = 32; n <= 48; n += 16) { 7506 for (size_t k = 1; k <= 40; k += 9) { 7507 GemmMicrokernelTester() 7508 .mr(3) 7509 .nr(16) 7510 .kr(4) 7511 .sr(1) 7512 .m(3) 7513 .n(n) 7514 .k(k) 7515 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7516 } 7517 } 7518 } 7519 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,n_div_16_strided_cn)7520 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, n_div_16_strided_cn) { 7521 TEST_REQUIRES_ARM_NEON_DOT; 7522 for (uint32_t n = 32; n <= 48; n += 16) { 7523 for (size_t k = 1; k <= 40; k += 9) { 7524 GemmMicrokernelTester() 7525 .mr(3) 7526 .nr(16) 7527 .kr(4) 7528 .sr(1) 7529 .m(3) 7530 .n(n) 7531 .k(k) 7532 .cn_stride(19) 7533 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7534 } 7535 } 7536 } 7537 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,n_div_16_subtile)7538 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, n_div_16_subtile) { 7539 TEST_REQUIRES_ARM_NEON_DOT; 7540 for (uint32_t n = 32; n <= 48; n += 16) { 7541 for (size_t k = 1; k <= 40; k += 9) { 7542 for (uint32_t m = 1; m <= 3; m++) { 7543 GemmMicrokernelTester() 7544 .mr(3) 7545 .nr(16) 7546 .kr(4) 7547 .sr(1) 7548 .m(m) 7549 .n(n) 7550 .k(k) 7551 .iterations(1) 7552 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7553 } 7554 } 7555 } 7556 } 7557 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,small_kernel)7558 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, small_kernel) { 7559 TEST_REQUIRES_ARM_NEON_DOT; 7560 for (size_t k = 1; k <= 40; k += 9) { 7561 GemmMicrokernelTester() 7562 .mr(3) 7563 .nr(16) 7564 .kr(4) 7565 .sr(1) 7566 .m(3) 7567 .n(16) 7568 .k(k) 7569 .ks(3) 7570 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7571 } 7572 } 7573 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,small_kernel_subtile)7574 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, small_kernel_subtile) { 7575 TEST_REQUIRES_ARM_NEON_DOT; 7576 for (size_t k = 1; k <= 40; k += 9) { 7577 for (uint32_t n = 1; n <= 16; n++) { 7578 for (uint32_t m = 1; m <= 3; m++) { 7579 GemmMicrokernelTester() 7580 .mr(3) 7581 .nr(16) 7582 .kr(4) 7583 .sr(1) 7584 .m(m) 7585 .n(n) 7586 .k(k) 7587 .ks(3) 7588 .iterations(1) 7589 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7590 } 7591 } 7592 } 7593 } 7594 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,n_gt_16_small_kernel)7595 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, n_gt_16_small_kernel) { 7596 TEST_REQUIRES_ARM_NEON_DOT; 7597 for (uint32_t n = 17; n < 32; n++) { 7598 for (size_t k = 1; k <= 40; k += 9) { 7599 GemmMicrokernelTester() 7600 .mr(3) 7601 .nr(16) 7602 .kr(4) 7603 .sr(1) 7604 .m(3) 7605 .n(n) 7606 .k(k) 7607 .ks(3) 7608 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7609 } 7610 } 7611 } 7612 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,n_div_16_small_kernel)7613 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, n_div_16_small_kernel) { 7614 TEST_REQUIRES_ARM_NEON_DOT; 7615 for (uint32_t n = 32; n <= 48; n += 16) { 7616 for (size_t k = 1; k <= 40; k += 9) { 7617 GemmMicrokernelTester() 7618 .mr(3) 7619 .nr(16) 7620 .kr(4) 7621 .sr(1) 7622 .m(3) 7623 .n(n) 7624 .k(k) 7625 .ks(3) 7626 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7627 } 7628 } 7629 } 7630 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,strided_cm_subtile)7631 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, strided_cm_subtile) { 7632 TEST_REQUIRES_ARM_NEON_DOT; 7633 for (size_t k = 1; k <= 40; k += 9) { 7634 for (uint32_t n = 1; n <= 16; n++) { 7635 for (uint32_t m = 1; m <= 3; m++) { 7636 GemmMicrokernelTester() 7637 .mr(3) 7638 .nr(16) 7639 .kr(4) 7640 .sr(1) 7641 .m(m) 7642 .n(n) 7643 .k(k) 7644 .cm_stride(19) 7645 .iterations(1) 7646 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7647 } 7648 } 7649 } 7650 } 7651 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,a_offset)7652 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, a_offset) { 7653 TEST_REQUIRES_ARM_NEON_DOT; 7654 for (size_t k = 1; k <= 40; k += 9) { 7655 GemmMicrokernelTester() 7656 .mr(3) 7657 .nr(16) 7658 .kr(4) 7659 .sr(1) 7660 .m(3) 7661 .n(16) 7662 .k(k) 7663 .ks(3) 7664 .a_offset(127) 7665 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7666 } 7667 } 7668 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,zero)7669 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, zero) { 7670 TEST_REQUIRES_ARM_NEON_DOT; 7671 for (size_t k = 1; k <= 40; k += 9) { 7672 for (uint32_t mz = 0; mz < 3; mz++) { 7673 GemmMicrokernelTester() 7674 .mr(3) 7675 .nr(16) 7676 .kr(4) 7677 .sr(1) 7678 .m(3) 7679 .n(16) 7680 .k(k) 7681 .ks(3) 7682 .a_offset(127) 7683 .zero_index(mz) 7684 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7685 } 7686 } 7687 } 7688 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,qmin)7689 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, qmin) { 7690 TEST_REQUIRES_ARM_NEON_DOT; 7691 GemmMicrokernelTester() 7692 .mr(3) 7693 .nr(16) 7694 .kr(4) 7695 .sr(1) 7696 .m(3) 7697 .n(16) 7698 .k(8) 7699 .qmin(128) 7700 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7701 } 7702 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,qmax)7703 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, qmax) { 7704 TEST_REQUIRES_ARM_NEON_DOT; 7705 GemmMicrokernelTester() 7706 .mr(3) 7707 .nr(16) 7708 .kr(4) 7709 .sr(1) 7710 .m(3) 7711 .n(16) 7712 .k(8) 7713 .qmax(128) 7714 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7715 } 7716 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,strided_cm)7717 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, strided_cm) { 7718 TEST_REQUIRES_ARM_NEON_DOT; 7719 GemmMicrokernelTester() 7720 .mr(3) 7721 .nr(16) 7722 .kr(4) 7723 .sr(1) 7724 .m(3) 7725 .n(16) 7726 .k(8) 7727 .cm_stride(19) 7728 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7729 } 7730 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,no_a_zero_point)7731 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, no_a_zero_point) { 7732 TEST_REQUIRES_ARM_NEON_DOT; 7733 for (size_t k = 1; k <= 40; k += 9) { 7734 GemmMicrokernelTester() 7735 .mr(3) 7736 .nr(16) 7737 .kr(4) 7738 .sr(1) 7739 .m(3) 7740 .n(16) 7741 .k(k) 7742 .a_zero_point(0) 7743 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7744 } 7745 } 7746 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,no_b_zero_point)7747 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, no_b_zero_point) { 7748 TEST_REQUIRES_ARM_NEON_DOT; 7749 for (size_t k = 1; k <= 40; k += 9) { 7750 GemmMicrokernelTester() 7751 .mr(3) 7752 .nr(16) 7753 .kr(4) 7754 .sr(1) 7755 .m(3) 7756 .n(16) 7757 .k(k) 7758 .b_zero_point(0) 7759 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7760 } 7761 } 7762 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT,no_zero_point)7763 TEST(QU8_IGEMM_MINMAX_RNDNU_3X16C4__NEONDOT, no_zero_point) { 7764 TEST_REQUIRES_ARM_NEON_DOT; 7765 for (size_t k = 1; k <= 40; k += 9) { 7766 GemmMicrokernelTester() 7767 .mr(3) 7768 .nr(16) 7769 .kr(4) 7770 .sr(1) 7771 .m(3) 7772 .n(16) 7773 .k(k) 7774 .a_zero_point(0) 7775 .b_zero_point(0) 7776 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7777 } 7778 } 7779 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64) 7780 7781 7782 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64) TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,k_eq_8)7783 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_eq_8) { 7784 TEST_REQUIRES_ARM_NEON_DOT; 7785 GemmMicrokernelTester() 7786 .mr(3) 7787 .nr(32) 7788 .kr(4) 7789 .sr(1) 7790 .m(3) 7791 .n(32) 7792 .k(8) 7793 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7794 } 7795 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,strided_cn)7796 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, strided_cn) { 7797 TEST_REQUIRES_ARM_NEON_DOT; 7798 GemmMicrokernelTester() 7799 .mr(3) 7800 .nr(32) 7801 .kr(4) 7802 .sr(1) 7803 .m(3) 7804 .n(32) 7805 .k(8) 7806 .cn_stride(37) 7807 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7808 } 7809 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,k_eq_8_subtile)7810 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_eq_8_subtile) { 7811 TEST_REQUIRES_ARM_NEON_DOT; 7812 for (uint32_t n = 1; n <= 32; n++) { 7813 for (uint32_t m = 1; m <= 3; m++) { 7814 GemmMicrokernelTester() 7815 .mr(3) 7816 .nr(32) 7817 .kr(4) 7818 .sr(1) 7819 .m(m) 7820 .n(n) 7821 .k(8) 7822 .iterations(1) 7823 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7824 } 7825 } 7826 } 7827 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,k_eq_8_subtile_m)7828 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_eq_8_subtile_m) { 7829 TEST_REQUIRES_ARM_NEON_DOT; 7830 for (uint32_t m = 1; m <= 3; m++) { 7831 GemmMicrokernelTester() 7832 .mr(3) 7833 .nr(32) 7834 .kr(4) 7835 .sr(1) 7836 .m(m) 7837 .n(32) 7838 .k(8) 7839 .iterations(1) 7840 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7841 } 7842 } 7843 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,k_eq_8_subtile_n)7844 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_eq_8_subtile_n) { 7845 TEST_REQUIRES_ARM_NEON_DOT; 7846 for (uint32_t n = 1; n <= 32; n++) { 7847 GemmMicrokernelTester() 7848 .mr(3) 7849 .nr(32) 7850 .kr(4) 7851 .sr(1) 7852 .m(3) 7853 .n(n) 7854 .k(8) 7855 .iterations(1) 7856 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7857 } 7858 } 7859 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,k_lt_8)7860 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_lt_8) { 7861 TEST_REQUIRES_ARM_NEON_DOT; 7862 for (size_t k = 1; k < 8; k++) { 7863 GemmMicrokernelTester() 7864 .mr(3) 7865 .nr(32) 7866 .kr(4) 7867 .sr(1) 7868 .m(3) 7869 .n(32) 7870 .k(k) 7871 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7872 } 7873 } 7874 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,k_lt_8_subtile)7875 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_lt_8_subtile) { 7876 TEST_REQUIRES_ARM_NEON_DOT; 7877 for (size_t k = 1; k < 8; k++) { 7878 for (uint32_t n = 1; n <= 32; n++) { 7879 for (uint32_t m = 1; m <= 3; m++) { 7880 GemmMicrokernelTester() 7881 .mr(3) 7882 .nr(32) 7883 .kr(4) 7884 .sr(1) 7885 .m(m) 7886 .n(n) 7887 .k(k) 7888 .iterations(1) 7889 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7890 } 7891 } 7892 } 7893 } 7894 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,k_gt_8)7895 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_gt_8) { 7896 TEST_REQUIRES_ARM_NEON_DOT; 7897 for (size_t k = 9; k < 16; k++) { 7898 GemmMicrokernelTester() 7899 .mr(3) 7900 .nr(32) 7901 .kr(4) 7902 .sr(1) 7903 .m(3) 7904 .n(32) 7905 .k(k) 7906 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7907 } 7908 } 7909 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,k_gt_8_subtile)7910 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_gt_8_subtile) { 7911 TEST_REQUIRES_ARM_NEON_DOT; 7912 for (size_t k = 9; k < 16; k++) { 7913 for (uint32_t n = 1; n <= 32; n++) { 7914 for (uint32_t m = 1; m <= 3; m++) { 7915 GemmMicrokernelTester() 7916 .mr(3) 7917 .nr(32) 7918 .kr(4) 7919 .sr(1) 7920 .m(m) 7921 .n(n) 7922 .k(k) 7923 .iterations(1) 7924 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7925 } 7926 } 7927 } 7928 } 7929 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,k_div_8)7930 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_div_8) { 7931 TEST_REQUIRES_ARM_NEON_DOT; 7932 for (size_t k = 16; k <= 80; k += 8) { 7933 GemmMicrokernelTester() 7934 .mr(3) 7935 .nr(32) 7936 .kr(4) 7937 .sr(1) 7938 .m(3) 7939 .n(32) 7940 .k(k) 7941 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7942 } 7943 } 7944 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,k_div_8_subtile)7945 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_div_8_subtile) { 7946 TEST_REQUIRES_ARM_NEON_DOT; 7947 for (size_t k = 16; k <= 80; k += 8) { 7948 for (uint32_t n = 1; n <= 32; n++) { 7949 for (uint32_t m = 1; m <= 3; m++) { 7950 GemmMicrokernelTester() 7951 .mr(3) 7952 .nr(32) 7953 .kr(4) 7954 .sr(1) 7955 .m(m) 7956 .n(n) 7957 .k(k) 7958 .iterations(1) 7959 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7960 } 7961 } 7962 } 7963 } 7964 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,n_gt_32)7965 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, n_gt_32) { 7966 TEST_REQUIRES_ARM_NEON_DOT; 7967 for (uint32_t n = 33; n < 64; n++) { 7968 for (size_t k = 1; k <= 40; k += 9) { 7969 GemmMicrokernelTester() 7970 .mr(3) 7971 .nr(32) 7972 .kr(4) 7973 .sr(1) 7974 .m(3) 7975 .n(n) 7976 .k(k) 7977 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7978 } 7979 } 7980 } 7981 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,n_gt_32_strided_cn)7982 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, n_gt_32_strided_cn) { 7983 TEST_REQUIRES_ARM_NEON_DOT; 7984 for (uint32_t n = 33; n < 64; n++) { 7985 for (size_t k = 1; k <= 40; k += 9) { 7986 GemmMicrokernelTester() 7987 .mr(3) 7988 .nr(32) 7989 .kr(4) 7990 .sr(1) 7991 .m(3) 7992 .n(n) 7993 .k(k) 7994 .cn_stride(37) 7995 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7996 } 7997 } 7998 } 7999 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,n_gt_32_subtile)8000 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, n_gt_32_subtile) { 8001 TEST_REQUIRES_ARM_NEON_DOT; 8002 for (uint32_t n = 33; n < 64; n++) { 8003 for (size_t k = 1; k <= 40; k += 9) { 8004 for (uint32_t m = 1; m <= 3; m++) { 8005 GemmMicrokernelTester() 8006 .mr(3) 8007 .nr(32) 8008 .kr(4) 8009 .sr(1) 8010 .m(m) 8011 .n(n) 8012 .k(k) 8013 .iterations(1) 8014 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8015 } 8016 } 8017 } 8018 } 8019 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,n_div_32)8020 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, n_div_32) { 8021 TEST_REQUIRES_ARM_NEON_DOT; 8022 for (uint32_t n = 64; n <= 96; n += 32) { 8023 for (size_t k = 1; k <= 40; k += 9) { 8024 GemmMicrokernelTester() 8025 .mr(3) 8026 .nr(32) 8027 .kr(4) 8028 .sr(1) 8029 .m(3) 8030 .n(n) 8031 .k(k) 8032 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8033 } 8034 } 8035 } 8036 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,n_div_32_strided_cn)8037 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, n_div_32_strided_cn) { 8038 TEST_REQUIRES_ARM_NEON_DOT; 8039 for (uint32_t n = 64; n <= 96; n += 32) { 8040 for (size_t k = 1; k <= 40; k += 9) { 8041 GemmMicrokernelTester() 8042 .mr(3) 8043 .nr(32) 8044 .kr(4) 8045 .sr(1) 8046 .m(3) 8047 .n(n) 8048 .k(k) 8049 .cn_stride(37) 8050 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8051 } 8052 } 8053 } 8054 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,n_div_32_subtile)8055 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, n_div_32_subtile) { 8056 TEST_REQUIRES_ARM_NEON_DOT; 8057 for (uint32_t n = 64; n <= 96; n += 32) { 8058 for (size_t k = 1; k <= 40; k += 9) { 8059 for (uint32_t m = 1; m <= 3; m++) { 8060 GemmMicrokernelTester() 8061 .mr(3) 8062 .nr(32) 8063 .kr(4) 8064 .sr(1) 8065 .m(m) 8066 .n(n) 8067 .k(k) 8068 .iterations(1) 8069 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8070 } 8071 } 8072 } 8073 } 8074 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,small_kernel)8075 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, small_kernel) { 8076 TEST_REQUIRES_ARM_NEON_DOT; 8077 for (size_t k = 1; k <= 40; k += 9) { 8078 GemmMicrokernelTester() 8079 .mr(3) 8080 .nr(32) 8081 .kr(4) 8082 .sr(1) 8083 .m(3) 8084 .n(32) 8085 .k(k) 8086 .ks(3) 8087 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8088 } 8089 } 8090 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,small_kernel_subtile)8091 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, small_kernel_subtile) { 8092 TEST_REQUIRES_ARM_NEON_DOT; 8093 for (size_t k = 1; k <= 40; k += 9) { 8094 for (uint32_t n = 1; n <= 32; n++) { 8095 for (uint32_t m = 1; m <= 3; m++) { 8096 GemmMicrokernelTester() 8097 .mr(3) 8098 .nr(32) 8099 .kr(4) 8100 .sr(1) 8101 .m(m) 8102 .n(n) 8103 .k(k) 8104 .ks(3) 8105 .iterations(1) 8106 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8107 } 8108 } 8109 } 8110 } 8111 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,n_gt_32_small_kernel)8112 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, n_gt_32_small_kernel) { 8113 TEST_REQUIRES_ARM_NEON_DOT; 8114 for (uint32_t n = 33; n < 64; n++) { 8115 for (size_t k = 1; k <= 40; k += 9) { 8116 GemmMicrokernelTester() 8117 .mr(3) 8118 .nr(32) 8119 .kr(4) 8120 .sr(1) 8121 .m(3) 8122 .n(n) 8123 .k(k) 8124 .ks(3) 8125 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8126 } 8127 } 8128 } 8129 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,n_div_32_small_kernel)8130 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, n_div_32_small_kernel) { 8131 TEST_REQUIRES_ARM_NEON_DOT; 8132 for (uint32_t n = 64; n <= 96; n += 32) { 8133 for (size_t k = 1; k <= 40; k += 9) { 8134 GemmMicrokernelTester() 8135 .mr(3) 8136 .nr(32) 8137 .kr(4) 8138 .sr(1) 8139 .m(3) 8140 .n(n) 8141 .k(k) 8142 .ks(3) 8143 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8144 } 8145 } 8146 } 8147 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,strided_cm_subtile)8148 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, strided_cm_subtile) { 8149 TEST_REQUIRES_ARM_NEON_DOT; 8150 for (size_t k = 1; k <= 40; k += 9) { 8151 for (uint32_t n = 1; n <= 32; n++) { 8152 for (uint32_t m = 1; m <= 3; m++) { 8153 GemmMicrokernelTester() 8154 .mr(3) 8155 .nr(32) 8156 .kr(4) 8157 .sr(1) 8158 .m(m) 8159 .n(n) 8160 .k(k) 8161 .cm_stride(37) 8162 .iterations(1) 8163 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8164 } 8165 } 8166 } 8167 } 8168 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,a_offset)8169 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, a_offset) { 8170 TEST_REQUIRES_ARM_NEON_DOT; 8171 for (size_t k = 1; k <= 40; k += 9) { 8172 GemmMicrokernelTester() 8173 .mr(3) 8174 .nr(32) 8175 .kr(4) 8176 .sr(1) 8177 .m(3) 8178 .n(32) 8179 .k(k) 8180 .ks(3) 8181 .a_offset(127) 8182 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8183 } 8184 } 8185 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,zero)8186 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, zero) { 8187 TEST_REQUIRES_ARM_NEON_DOT; 8188 for (size_t k = 1; k <= 40; k += 9) { 8189 for (uint32_t mz = 0; mz < 3; mz++) { 8190 GemmMicrokernelTester() 8191 .mr(3) 8192 .nr(32) 8193 .kr(4) 8194 .sr(1) 8195 .m(3) 8196 .n(32) 8197 .k(k) 8198 .ks(3) 8199 .a_offset(127) 8200 .zero_index(mz) 8201 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8202 } 8203 } 8204 } 8205 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,qmin)8206 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, qmin) { 8207 TEST_REQUIRES_ARM_NEON_DOT; 8208 GemmMicrokernelTester() 8209 .mr(3) 8210 .nr(32) 8211 .kr(4) 8212 .sr(1) 8213 .m(3) 8214 .n(32) 8215 .k(8) 8216 .qmin(128) 8217 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8218 } 8219 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,qmax)8220 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, qmax) { 8221 TEST_REQUIRES_ARM_NEON_DOT; 8222 GemmMicrokernelTester() 8223 .mr(3) 8224 .nr(32) 8225 .kr(4) 8226 .sr(1) 8227 .m(3) 8228 .n(32) 8229 .k(8) 8230 .qmax(128) 8231 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8232 } 8233 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,strided_cm)8234 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, strided_cm) { 8235 TEST_REQUIRES_ARM_NEON_DOT; 8236 GemmMicrokernelTester() 8237 .mr(3) 8238 .nr(32) 8239 .kr(4) 8240 .sr(1) 8241 .m(3) 8242 .n(32) 8243 .k(8) 8244 .cm_stride(37) 8245 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8246 } 8247 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,no_a_zero_point)8248 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, no_a_zero_point) { 8249 TEST_REQUIRES_ARM_NEON_DOT; 8250 for (size_t k = 1; k <= 40; k += 9) { 8251 GemmMicrokernelTester() 8252 .mr(3) 8253 .nr(32) 8254 .kr(4) 8255 .sr(1) 8256 .m(3) 8257 .n(32) 8258 .k(k) 8259 .a_zero_point(0) 8260 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8261 } 8262 } 8263 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,no_b_zero_point)8264 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, no_b_zero_point) { 8265 TEST_REQUIRES_ARM_NEON_DOT; 8266 for (size_t k = 1; k <= 40; k += 9) { 8267 GemmMicrokernelTester() 8268 .mr(3) 8269 .nr(32) 8270 .kr(4) 8271 .sr(1) 8272 .m(3) 8273 .n(32) 8274 .k(k) 8275 .b_zero_point(0) 8276 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8277 } 8278 } 8279 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT,no_zero_point)8280 TEST(QU8_IGEMM_MINMAX_RNDNU_3X32C4__NEONDOT, no_zero_point) { 8281 TEST_REQUIRES_ARM_NEON_DOT; 8282 for (size_t k = 1; k <= 40; k += 9) { 8283 GemmMicrokernelTester() 8284 .mr(3) 8285 .nr(32) 8286 .kr(4) 8287 .sr(1) 8288 .m(3) 8289 .n(32) 8290 .k(k) 8291 .a_zero_point(0) 8292 .b_zero_point(0) 8293 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8294 } 8295 } 8296 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64) 8297 8298 8299 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_eq_8)8300 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8) { 8301 TEST_REQUIRES_ARM_NEON; 8302 GemmMicrokernelTester() 8303 .mr(4) 8304 .nr(8) 8305 .kr(1) 8306 .sr(1) 8307 .m(4) 8308 .n(8) 8309 .k(8) 8310 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8311 } 8312 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,strided_cn)8313 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, strided_cn) { 8314 TEST_REQUIRES_ARM_NEON; 8315 GemmMicrokernelTester() 8316 .mr(4) 8317 .nr(8) 8318 .kr(1) 8319 .sr(1) 8320 .m(4) 8321 .n(8) 8322 .k(8) 8323 .cn_stride(11) 8324 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8325 } 8326 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_eq_8_subtile)8327 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8_subtile) { 8328 TEST_REQUIRES_ARM_NEON; 8329 for (uint32_t n = 1; n <= 8; n++) { 8330 for (uint32_t m = 1; m <= 4; m++) { 8331 GemmMicrokernelTester() 8332 .mr(4) 8333 .nr(8) 8334 .kr(1) 8335 .sr(1) 8336 .m(m) 8337 .n(n) 8338 .k(8) 8339 .iterations(1) 8340 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8341 } 8342 } 8343 } 8344 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_eq_8_subtile_m)8345 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8_subtile_m) { 8346 TEST_REQUIRES_ARM_NEON; 8347 for (uint32_t m = 1; m <= 4; m++) { 8348 GemmMicrokernelTester() 8349 .mr(4) 8350 .nr(8) 8351 .kr(1) 8352 .sr(1) 8353 .m(m) 8354 .n(8) 8355 .k(8) 8356 .iterations(1) 8357 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8358 } 8359 } 8360 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_eq_8_subtile_n)8361 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8_subtile_n) { 8362 TEST_REQUIRES_ARM_NEON; 8363 for (uint32_t n = 1; n <= 8; n++) { 8364 GemmMicrokernelTester() 8365 .mr(4) 8366 .nr(8) 8367 .kr(1) 8368 .sr(1) 8369 .m(4) 8370 .n(n) 8371 .k(8) 8372 .iterations(1) 8373 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8374 } 8375 } 8376 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_lt_8)8377 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_lt_8) { 8378 TEST_REQUIRES_ARM_NEON; 8379 for (size_t k = 1; k < 8; k++) { 8380 GemmMicrokernelTester() 8381 .mr(4) 8382 .nr(8) 8383 .kr(1) 8384 .sr(1) 8385 .m(4) 8386 .n(8) 8387 .k(k) 8388 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8389 } 8390 } 8391 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_lt_8_subtile)8392 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_lt_8_subtile) { 8393 TEST_REQUIRES_ARM_NEON; 8394 for (size_t k = 1; k < 8; k++) { 8395 for (uint32_t n = 1; n <= 8; n++) { 8396 for (uint32_t m = 1; m <= 4; m++) { 8397 GemmMicrokernelTester() 8398 .mr(4) 8399 .nr(8) 8400 .kr(1) 8401 .sr(1) 8402 .m(m) 8403 .n(n) 8404 .k(k) 8405 .iterations(1) 8406 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8407 } 8408 } 8409 } 8410 } 8411 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_gt_8)8412 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_gt_8) { 8413 TEST_REQUIRES_ARM_NEON; 8414 for (size_t k = 9; k < 16; k++) { 8415 GemmMicrokernelTester() 8416 .mr(4) 8417 .nr(8) 8418 .kr(1) 8419 .sr(1) 8420 .m(4) 8421 .n(8) 8422 .k(k) 8423 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8424 } 8425 } 8426 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_gt_8_subtile)8427 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_gt_8_subtile) { 8428 TEST_REQUIRES_ARM_NEON; 8429 for (size_t k = 9; k < 16; k++) { 8430 for (uint32_t n = 1; n <= 8; n++) { 8431 for (uint32_t m = 1; m <= 4; m++) { 8432 GemmMicrokernelTester() 8433 .mr(4) 8434 .nr(8) 8435 .kr(1) 8436 .sr(1) 8437 .m(m) 8438 .n(n) 8439 .k(k) 8440 .iterations(1) 8441 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8442 } 8443 } 8444 } 8445 } 8446 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_div_8)8447 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_div_8) { 8448 TEST_REQUIRES_ARM_NEON; 8449 for (size_t k = 16; k <= 80; k += 8) { 8450 GemmMicrokernelTester() 8451 .mr(4) 8452 .nr(8) 8453 .kr(1) 8454 .sr(1) 8455 .m(4) 8456 .n(8) 8457 .k(k) 8458 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8459 } 8460 } 8461 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_div_8_subtile)8462 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_div_8_subtile) { 8463 TEST_REQUIRES_ARM_NEON; 8464 for (size_t k = 16; k <= 80; k += 8) { 8465 for (uint32_t n = 1; n <= 8; n++) { 8466 for (uint32_t m = 1; m <= 4; m++) { 8467 GemmMicrokernelTester() 8468 .mr(4) 8469 .nr(8) 8470 .kr(1) 8471 .sr(1) 8472 .m(m) 8473 .n(n) 8474 .k(k) 8475 .iterations(1) 8476 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8477 } 8478 } 8479 } 8480 } 8481 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,n_gt_8)8482 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8) { 8483 TEST_REQUIRES_ARM_NEON; 8484 for (uint32_t n = 9; n < 16; n++) { 8485 for (size_t k = 1; k <= 40; k += 9) { 8486 GemmMicrokernelTester() 8487 .mr(4) 8488 .nr(8) 8489 .kr(1) 8490 .sr(1) 8491 .m(4) 8492 .n(n) 8493 .k(k) 8494 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8495 } 8496 } 8497 } 8498 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,n_gt_8_strided_cn)8499 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8_strided_cn) { 8500 TEST_REQUIRES_ARM_NEON; 8501 for (uint32_t n = 9; n < 16; n++) { 8502 for (size_t k = 1; k <= 40; k += 9) { 8503 GemmMicrokernelTester() 8504 .mr(4) 8505 .nr(8) 8506 .kr(1) 8507 .sr(1) 8508 .m(4) 8509 .n(n) 8510 .k(k) 8511 .cn_stride(11) 8512 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8513 } 8514 } 8515 } 8516 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,n_gt_8_subtile)8517 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8_subtile) { 8518 TEST_REQUIRES_ARM_NEON; 8519 for (uint32_t n = 9; n < 16; n++) { 8520 for (size_t k = 1; k <= 40; k += 9) { 8521 for (uint32_t m = 1; m <= 4; m++) { 8522 GemmMicrokernelTester() 8523 .mr(4) 8524 .nr(8) 8525 .kr(1) 8526 .sr(1) 8527 .m(m) 8528 .n(n) 8529 .k(k) 8530 .iterations(1) 8531 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8532 } 8533 } 8534 } 8535 } 8536 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,n_div_8)8537 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8) { 8538 TEST_REQUIRES_ARM_NEON; 8539 for (uint32_t n = 16; n <= 24; n += 8) { 8540 for (size_t k = 1; k <= 40; k += 9) { 8541 GemmMicrokernelTester() 8542 .mr(4) 8543 .nr(8) 8544 .kr(1) 8545 .sr(1) 8546 .m(4) 8547 .n(n) 8548 .k(k) 8549 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8550 } 8551 } 8552 } 8553 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,n_div_8_strided_cn)8554 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8_strided_cn) { 8555 TEST_REQUIRES_ARM_NEON; 8556 for (uint32_t n = 16; n <= 24; n += 8) { 8557 for (size_t k = 1; k <= 40; k += 9) { 8558 GemmMicrokernelTester() 8559 .mr(4) 8560 .nr(8) 8561 .kr(1) 8562 .sr(1) 8563 .m(4) 8564 .n(n) 8565 .k(k) 8566 .cn_stride(11) 8567 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8568 } 8569 } 8570 } 8571 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,n_div_8_subtile)8572 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8_subtile) { 8573 TEST_REQUIRES_ARM_NEON; 8574 for (uint32_t n = 16; n <= 24; n += 8) { 8575 for (size_t k = 1; k <= 40; k += 9) { 8576 for (uint32_t m = 1; m <= 4; m++) { 8577 GemmMicrokernelTester() 8578 .mr(4) 8579 .nr(8) 8580 .kr(1) 8581 .sr(1) 8582 .m(m) 8583 .n(n) 8584 .k(k) 8585 .iterations(1) 8586 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8587 } 8588 } 8589 } 8590 } 8591 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,small_kernel)8592 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, small_kernel) { 8593 TEST_REQUIRES_ARM_NEON; 8594 for (size_t k = 1; k <= 40; k += 9) { 8595 GemmMicrokernelTester() 8596 .mr(4) 8597 .nr(8) 8598 .kr(1) 8599 .sr(1) 8600 .m(4) 8601 .n(8) 8602 .k(k) 8603 .ks(3) 8604 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8605 } 8606 } 8607 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,small_kernel_subtile)8608 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, small_kernel_subtile) { 8609 TEST_REQUIRES_ARM_NEON; 8610 for (size_t k = 1; k <= 40; k += 9) { 8611 for (uint32_t n = 1; n <= 8; n++) { 8612 for (uint32_t m = 1; m <= 4; m++) { 8613 GemmMicrokernelTester() 8614 .mr(4) 8615 .nr(8) 8616 .kr(1) 8617 .sr(1) 8618 .m(m) 8619 .n(n) 8620 .k(k) 8621 .ks(3) 8622 .iterations(1) 8623 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8624 } 8625 } 8626 } 8627 } 8628 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,n_gt_8_small_kernel)8629 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8_small_kernel) { 8630 TEST_REQUIRES_ARM_NEON; 8631 for (uint32_t n = 9; n < 16; n++) { 8632 for (size_t k = 1; k <= 40; k += 9) { 8633 GemmMicrokernelTester() 8634 .mr(4) 8635 .nr(8) 8636 .kr(1) 8637 .sr(1) 8638 .m(4) 8639 .n(n) 8640 .k(k) 8641 .ks(3) 8642 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8643 } 8644 } 8645 } 8646 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,n_div_8_small_kernel)8647 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8_small_kernel) { 8648 TEST_REQUIRES_ARM_NEON; 8649 for (uint32_t n = 16; n <= 24; n += 8) { 8650 for (size_t k = 1; k <= 40; k += 9) { 8651 GemmMicrokernelTester() 8652 .mr(4) 8653 .nr(8) 8654 .kr(1) 8655 .sr(1) 8656 .m(4) 8657 .n(n) 8658 .k(k) 8659 .ks(3) 8660 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8661 } 8662 } 8663 } 8664 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,strided_cm_subtile)8665 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, strided_cm_subtile) { 8666 TEST_REQUIRES_ARM_NEON; 8667 for (size_t k = 1; k <= 40; k += 9) { 8668 for (uint32_t n = 1; n <= 8; n++) { 8669 for (uint32_t m = 1; m <= 4; m++) { 8670 GemmMicrokernelTester() 8671 .mr(4) 8672 .nr(8) 8673 .kr(1) 8674 .sr(1) 8675 .m(m) 8676 .n(n) 8677 .k(k) 8678 .cm_stride(11) 8679 .iterations(1) 8680 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8681 } 8682 } 8683 } 8684 } 8685 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,a_offset)8686 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, a_offset) { 8687 TEST_REQUIRES_ARM_NEON; 8688 for (size_t k = 1; k <= 40; k += 9) { 8689 GemmMicrokernelTester() 8690 .mr(4) 8691 .nr(8) 8692 .kr(1) 8693 .sr(1) 8694 .m(4) 8695 .n(8) 8696 .k(k) 8697 .ks(3) 8698 .a_offset(163) 8699 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8700 } 8701 } 8702 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,zero)8703 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, zero) { 8704 TEST_REQUIRES_ARM_NEON; 8705 for (size_t k = 1; k <= 40; k += 9) { 8706 for (uint32_t mz = 0; mz < 4; mz++) { 8707 GemmMicrokernelTester() 8708 .mr(4) 8709 .nr(8) 8710 .kr(1) 8711 .sr(1) 8712 .m(4) 8713 .n(8) 8714 .k(k) 8715 .ks(3) 8716 .a_offset(163) 8717 .zero_index(mz) 8718 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8719 } 8720 } 8721 } 8722 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,qmin)8723 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, qmin) { 8724 TEST_REQUIRES_ARM_NEON; 8725 GemmMicrokernelTester() 8726 .mr(4) 8727 .nr(8) 8728 .kr(1) 8729 .sr(1) 8730 .m(4) 8731 .n(8) 8732 .k(8) 8733 .qmin(128) 8734 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8735 } 8736 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,qmax)8737 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, qmax) { 8738 TEST_REQUIRES_ARM_NEON; 8739 GemmMicrokernelTester() 8740 .mr(4) 8741 .nr(8) 8742 .kr(1) 8743 .sr(1) 8744 .m(4) 8745 .n(8) 8746 .k(8) 8747 .qmax(128) 8748 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8749 } 8750 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,strided_cm)8751 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, strided_cm) { 8752 TEST_REQUIRES_ARM_NEON; 8753 GemmMicrokernelTester() 8754 .mr(4) 8755 .nr(8) 8756 .kr(1) 8757 .sr(1) 8758 .m(4) 8759 .n(8) 8760 .k(8) 8761 .cm_stride(11) 8762 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8763 } 8764 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,no_a_zero_point)8765 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, no_a_zero_point) { 8766 TEST_REQUIRES_ARM_NEON; 8767 for (size_t k = 1; k <= 40; k += 9) { 8768 GemmMicrokernelTester() 8769 .mr(4) 8770 .nr(8) 8771 .kr(1) 8772 .sr(1) 8773 .m(4) 8774 .n(8) 8775 .k(k) 8776 .a_zero_point(0) 8777 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8778 } 8779 } 8780 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,no_b_zero_point)8781 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, no_b_zero_point) { 8782 TEST_REQUIRES_ARM_NEON; 8783 for (size_t k = 1; k <= 40; k += 9) { 8784 GemmMicrokernelTester() 8785 .mr(4) 8786 .nr(8) 8787 .kr(1) 8788 .sr(1) 8789 .m(4) 8790 .n(8) 8791 .k(k) 8792 .b_zero_point(0) 8793 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8794 } 8795 } 8796 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,no_zero_point)8797 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, no_zero_point) { 8798 TEST_REQUIRES_ARM_NEON; 8799 for (size_t k = 1; k <= 40; k += 9) { 8800 GemmMicrokernelTester() 8801 .mr(4) 8802 .nr(8) 8803 .kr(1) 8804 .sr(1) 8805 .m(4) 8806 .n(8) 8807 .k(k) 8808 .a_zero_point(0) 8809 .b_zero_point(0) 8810 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8811 } 8812 } 8813 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 8814 8815 8816 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64) TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_eq_8)8817 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8) { 8818 TEST_REQUIRES_ARM_NEON_DOT; 8819 GemmMicrokernelTester() 8820 .mr(4) 8821 .nr(16) 8822 .kr(4) 8823 .sr(1) 8824 .m(4) 8825 .n(16) 8826 .k(8) 8827 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8828 } 8829 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,strided_cn)8830 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, strided_cn) { 8831 TEST_REQUIRES_ARM_NEON_DOT; 8832 GemmMicrokernelTester() 8833 .mr(4) 8834 .nr(16) 8835 .kr(4) 8836 .sr(1) 8837 .m(4) 8838 .n(16) 8839 .k(8) 8840 .cn_stride(19) 8841 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8842 } 8843 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_eq_8_subtile)8844 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8_subtile) { 8845 TEST_REQUIRES_ARM_NEON_DOT; 8846 for (uint32_t n = 1; n <= 16; n++) { 8847 for (uint32_t m = 1; m <= 4; m++) { 8848 GemmMicrokernelTester() 8849 .mr(4) 8850 .nr(16) 8851 .kr(4) 8852 .sr(1) 8853 .m(m) 8854 .n(n) 8855 .k(8) 8856 .iterations(1) 8857 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8858 } 8859 } 8860 } 8861 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_eq_8_subtile_m)8862 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8_subtile_m) { 8863 TEST_REQUIRES_ARM_NEON_DOT; 8864 for (uint32_t m = 1; m <= 4; m++) { 8865 GemmMicrokernelTester() 8866 .mr(4) 8867 .nr(16) 8868 .kr(4) 8869 .sr(1) 8870 .m(m) 8871 .n(16) 8872 .k(8) 8873 .iterations(1) 8874 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8875 } 8876 } 8877 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_eq_8_subtile_n)8878 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8_subtile_n) { 8879 TEST_REQUIRES_ARM_NEON_DOT; 8880 for (uint32_t n = 1; n <= 16; n++) { 8881 GemmMicrokernelTester() 8882 .mr(4) 8883 .nr(16) 8884 .kr(4) 8885 .sr(1) 8886 .m(4) 8887 .n(n) 8888 .k(8) 8889 .iterations(1) 8890 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8891 } 8892 } 8893 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_lt_8)8894 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_lt_8) { 8895 TEST_REQUIRES_ARM_NEON_DOT; 8896 for (size_t k = 1; k < 8; k++) { 8897 GemmMicrokernelTester() 8898 .mr(4) 8899 .nr(16) 8900 .kr(4) 8901 .sr(1) 8902 .m(4) 8903 .n(16) 8904 .k(k) 8905 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8906 } 8907 } 8908 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_lt_8_subtile)8909 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_lt_8_subtile) { 8910 TEST_REQUIRES_ARM_NEON_DOT; 8911 for (size_t k = 1; k < 8; k++) { 8912 for (uint32_t n = 1; n <= 16; n++) { 8913 for (uint32_t m = 1; m <= 4; m++) { 8914 GemmMicrokernelTester() 8915 .mr(4) 8916 .nr(16) 8917 .kr(4) 8918 .sr(1) 8919 .m(m) 8920 .n(n) 8921 .k(k) 8922 .iterations(1) 8923 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8924 } 8925 } 8926 } 8927 } 8928 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_gt_8)8929 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_gt_8) { 8930 TEST_REQUIRES_ARM_NEON_DOT; 8931 for (size_t k = 9; k < 16; k++) { 8932 GemmMicrokernelTester() 8933 .mr(4) 8934 .nr(16) 8935 .kr(4) 8936 .sr(1) 8937 .m(4) 8938 .n(16) 8939 .k(k) 8940 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8941 } 8942 } 8943 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_gt_8_subtile)8944 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_gt_8_subtile) { 8945 TEST_REQUIRES_ARM_NEON_DOT; 8946 for (size_t k = 9; k < 16; k++) { 8947 for (uint32_t n = 1; n <= 16; n++) { 8948 for (uint32_t m = 1; m <= 4; m++) { 8949 GemmMicrokernelTester() 8950 .mr(4) 8951 .nr(16) 8952 .kr(4) 8953 .sr(1) 8954 .m(m) 8955 .n(n) 8956 .k(k) 8957 .iterations(1) 8958 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8959 } 8960 } 8961 } 8962 } 8963 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_div_8)8964 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_div_8) { 8965 TEST_REQUIRES_ARM_NEON_DOT; 8966 for (size_t k = 16; k <= 80; k += 8) { 8967 GemmMicrokernelTester() 8968 .mr(4) 8969 .nr(16) 8970 .kr(4) 8971 .sr(1) 8972 .m(4) 8973 .n(16) 8974 .k(k) 8975 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8976 } 8977 } 8978 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_div_8_subtile)8979 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_div_8_subtile) { 8980 TEST_REQUIRES_ARM_NEON_DOT; 8981 for (size_t k = 16; k <= 80; k += 8) { 8982 for (uint32_t n = 1; n <= 16; n++) { 8983 for (uint32_t m = 1; m <= 4; m++) { 8984 GemmMicrokernelTester() 8985 .mr(4) 8986 .nr(16) 8987 .kr(4) 8988 .sr(1) 8989 .m(m) 8990 .n(n) 8991 .k(k) 8992 .iterations(1) 8993 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8994 } 8995 } 8996 } 8997 } 8998 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,n_gt_16)8999 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16) { 9000 TEST_REQUIRES_ARM_NEON_DOT; 9001 for (uint32_t n = 17; n < 32; n++) { 9002 for (size_t k = 1; k <= 40; k += 9) { 9003 GemmMicrokernelTester() 9004 .mr(4) 9005 .nr(16) 9006 .kr(4) 9007 .sr(1) 9008 .m(4) 9009 .n(n) 9010 .k(k) 9011 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9012 } 9013 } 9014 } 9015 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,n_gt_16_strided_cn)9016 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16_strided_cn) { 9017 TEST_REQUIRES_ARM_NEON_DOT; 9018 for (uint32_t n = 17; n < 32; n++) { 9019 for (size_t k = 1; k <= 40; k += 9) { 9020 GemmMicrokernelTester() 9021 .mr(4) 9022 .nr(16) 9023 .kr(4) 9024 .sr(1) 9025 .m(4) 9026 .n(n) 9027 .k(k) 9028 .cn_stride(19) 9029 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9030 } 9031 } 9032 } 9033 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,n_gt_16_subtile)9034 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16_subtile) { 9035 TEST_REQUIRES_ARM_NEON_DOT; 9036 for (uint32_t n = 17; n < 32; n++) { 9037 for (size_t k = 1; k <= 40; k += 9) { 9038 for (uint32_t m = 1; m <= 4; m++) { 9039 GemmMicrokernelTester() 9040 .mr(4) 9041 .nr(16) 9042 .kr(4) 9043 .sr(1) 9044 .m(m) 9045 .n(n) 9046 .k(k) 9047 .iterations(1) 9048 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9049 } 9050 } 9051 } 9052 } 9053 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,n_div_16)9054 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16) { 9055 TEST_REQUIRES_ARM_NEON_DOT; 9056 for (uint32_t n = 32; n <= 48; n += 16) { 9057 for (size_t k = 1; k <= 40; k += 9) { 9058 GemmMicrokernelTester() 9059 .mr(4) 9060 .nr(16) 9061 .kr(4) 9062 .sr(1) 9063 .m(4) 9064 .n(n) 9065 .k(k) 9066 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9067 } 9068 } 9069 } 9070 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,n_div_16_strided_cn)9071 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16_strided_cn) { 9072 TEST_REQUIRES_ARM_NEON_DOT; 9073 for (uint32_t n = 32; n <= 48; n += 16) { 9074 for (size_t k = 1; k <= 40; k += 9) { 9075 GemmMicrokernelTester() 9076 .mr(4) 9077 .nr(16) 9078 .kr(4) 9079 .sr(1) 9080 .m(4) 9081 .n(n) 9082 .k(k) 9083 .cn_stride(19) 9084 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9085 } 9086 } 9087 } 9088 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,n_div_16_subtile)9089 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16_subtile) { 9090 TEST_REQUIRES_ARM_NEON_DOT; 9091 for (uint32_t n = 32; n <= 48; n += 16) { 9092 for (size_t k = 1; k <= 40; k += 9) { 9093 for (uint32_t m = 1; m <= 4; m++) { 9094 GemmMicrokernelTester() 9095 .mr(4) 9096 .nr(16) 9097 .kr(4) 9098 .sr(1) 9099 .m(m) 9100 .n(n) 9101 .k(k) 9102 .iterations(1) 9103 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9104 } 9105 } 9106 } 9107 } 9108 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,small_kernel)9109 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, small_kernel) { 9110 TEST_REQUIRES_ARM_NEON_DOT; 9111 for (size_t k = 1; k <= 40; k += 9) { 9112 GemmMicrokernelTester() 9113 .mr(4) 9114 .nr(16) 9115 .kr(4) 9116 .sr(1) 9117 .m(4) 9118 .n(16) 9119 .k(k) 9120 .ks(3) 9121 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9122 } 9123 } 9124 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,small_kernel_subtile)9125 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, small_kernel_subtile) { 9126 TEST_REQUIRES_ARM_NEON_DOT; 9127 for (size_t k = 1; k <= 40; k += 9) { 9128 for (uint32_t n = 1; n <= 16; n++) { 9129 for (uint32_t m = 1; m <= 4; m++) { 9130 GemmMicrokernelTester() 9131 .mr(4) 9132 .nr(16) 9133 .kr(4) 9134 .sr(1) 9135 .m(m) 9136 .n(n) 9137 .k(k) 9138 .ks(3) 9139 .iterations(1) 9140 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9141 } 9142 } 9143 } 9144 } 9145 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,n_gt_16_small_kernel)9146 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16_small_kernel) { 9147 TEST_REQUIRES_ARM_NEON_DOT; 9148 for (uint32_t n = 17; n < 32; n++) { 9149 for (size_t k = 1; k <= 40; k += 9) { 9150 GemmMicrokernelTester() 9151 .mr(4) 9152 .nr(16) 9153 .kr(4) 9154 .sr(1) 9155 .m(4) 9156 .n(n) 9157 .k(k) 9158 .ks(3) 9159 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9160 } 9161 } 9162 } 9163 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,n_div_16_small_kernel)9164 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16_small_kernel) { 9165 TEST_REQUIRES_ARM_NEON_DOT; 9166 for (uint32_t n = 32; n <= 48; n += 16) { 9167 for (size_t k = 1; k <= 40; k += 9) { 9168 GemmMicrokernelTester() 9169 .mr(4) 9170 .nr(16) 9171 .kr(4) 9172 .sr(1) 9173 .m(4) 9174 .n(n) 9175 .k(k) 9176 .ks(3) 9177 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9178 } 9179 } 9180 } 9181 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,strided_cm_subtile)9182 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, strided_cm_subtile) { 9183 TEST_REQUIRES_ARM_NEON_DOT; 9184 for (size_t k = 1; k <= 40; k += 9) { 9185 for (uint32_t n = 1; n <= 16; n++) { 9186 for (uint32_t m = 1; m <= 4; m++) { 9187 GemmMicrokernelTester() 9188 .mr(4) 9189 .nr(16) 9190 .kr(4) 9191 .sr(1) 9192 .m(m) 9193 .n(n) 9194 .k(k) 9195 .cm_stride(19) 9196 .iterations(1) 9197 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9198 } 9199 } 9200 } 9201 } 9202 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,a_offset)9203 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, a_offset) { 9204 TEST_REQUIRES_ARM_NEON_DOT; 9205 for (size_t k = 1; k <= 40; k += 9) { 9206 GemmMicrokernelTester() 9207 .mr(4) 9208 .nr(16) 9209 .kr(4) 9210 .sr(1) 9211 .m(4) 9212 .n(16) 9213 .k(k) 9214 .ks(3) 9215 .a_offset(163) 9216 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9217 } 9218 } 9219 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,zero)9220 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, zero) { 9221 TEST_REQUIRES_ARM_NEON_DOT; 9222 for (size_t k = 1; k <= 40; k += 9) { 9223 for (uint32_t mz = 0; mz < 4; mz++) { 9224 GemmMicrokernelTester() 9225 .mr(4) 9226 .nr(16) 9227 .kr(4) 9228 .sr(1) 9229 .m(4) 9230 .n(16) 9231 .k(k) 9232 .ks(3) 9233 .a_offset(163) 9234 .zero_index(mz) 9235 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9236 } 9237 } 9238 } 9239 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,qmin)9240 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, qmin) { 9241 TEST_REQUIRES_ARM_NEON_DOT; 9242 GemmMicrokernelTester() 9243 .mr(4) 9244 .nr(16) 9245 .kr(4) 9246 .sr(1) 9247 .m(4) 9248 .n(16) 9249 .k(8) 9250 .qmin(128) 9251 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9252 } 9253 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,qmax)9254 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, qmax) { 9255 TEST_REQUIRES_ARM_NEON_DOT; 9256 GemmMicrokernelTester() 9257 .mr(4) 9258 .nr(16) 9259 .kr(4) 9260 .sr(1) 9261 .m(4) 9262 .n(16) 9263 .k(8) 9264 .qmax(128) 9265 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9266 } 9267 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,strided_cm)9268 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, strided_cm) { 9269 TEST_REQUIRES_ARM_NEON_DOT; 9270 GemmMicrokernelTester() 9271 .mr(4) 9272 .nr(16) 9273 .kr(4) 9274 .sr(1) 9275 .m(4) 9276 .n(16) 9277 .k(8) 9278 .cm_stride(19) 9279 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9280 } 9281 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,no_a_zero_point)9282 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, no_a_zero_point) { 9283 TEST_REQUIRES_ARM_NEON_DOT; 9284 for (size_t k = 1; k <= 40; k += 9) { 9285 GemmMicrokernelTester() 9286 .mr(4) 9287 .nr(16) 9288 .kr(4) 9289 .sr(1) 9290 .m(4) 9291 .n(16) 9292 .k(k) 9293 .a_zero_point(0) 9294 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9295 } 9296 } 9297 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,no_b_zero_point)9298 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, no_b_zero_point) { 9299 TEST_REQUIRES_ARM_NEON_DOT; 9300 for (size_t k = 1; k <= 40; k += 9) { 9301 GemmMicrokernelTester() 9302 .mr(4) 9303 .nr(16) 9304 .kr(4) 9305 .sr(1) 9306 .m(4) 9307 .n(16) 9308 .k(k) 9309 .b_zero_point(0) 9310 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9311 } 9312 } 9313 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,no_zero_point)9314 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, no_zero_point) { 9315 TEST_REQUIRES_ARM_NEON_DOT; 9316 for (size_t k = 1; k <= 40; k += 9) { 9317 GemmMicrokernelTester() 9318 .mr(4) 9319 .nr(16) 9320 .kr(4) 9321 .sr(1) 9322 .m(4) 9323 .n(16) 9324 .k(k) 9325 .a_zero_point(0) 9326 .b_zero_point(0) 9327 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9328 } 9329 } 9330 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64) 9331 9332 9333 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64) TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,k_eq_8)9334 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_eq_8) { 9335 TEST_REQUIRES_ARM_NEON_DOT; 9336 GemmMicrokernelTester() 9337 .mr(5) 9338 .nr(16) 9339 .kr(4) 9340 .sr(1) 9341 .m(5) 9342 .n(16) 9343 .k(8) 9344 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9345 } 9346 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,strided_cn)9347 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, strided_cn) { 9348 TEST_REQUIRES_ARM_NEON_DOT; 9349 GemmMicrokernelTester() 9350 .mr(5) 9351 .nr(16) 9352 .kr(4) 9353 .sr(1) 9354 .m(5) 9355 .n(16) 9356 .k(8) 9357 .cn_stride(19) 9358 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9359 } 9360 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,k_eq_8_subtile)9361 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_eq_8_subtile) { 9362 TEST_REQUIRES_ARM_NEON_DOT; 9363 for (uint32_t n = 1; n <= 16; n++) { 9364 for (uint32_t m = 1; m <= 5; m++) { 9365 GemmMicrokernelTester() 9366 .mr(5) 9367 .nr(16) 9368 .kr(4) 9369 .sr(1) 9370 .m(m) 9371 .n(n) 9372 .k(8) 9373 .iterations(1) 9374 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9375 } 9376 } 9377 } 9378 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,k_eq_8_subtile_m)9379 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_eq_8_subtile_m) { 9380 TEST_REQUIRES_ARM_NEON_DOT; 9381 for (uint32_t m = 1; m <= 5; m++) { 9382 GemmMicrokernelTester() 9383 .mr(5) 9384 .nr(16) 9385 .kr(4) 9386 .sr(1) 9387 .m(m) 9388 .n(16) 9389 .k(8) 9390 .iterations(1) 9391 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9392 } 9393 } 9394 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,k_eq_8_subtile_n)9395 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_eq_8_subtile_n) { 9396 TEST_REQUIRES_ARM_NEON_DOT; 9397 for (uint32_t n = 1; n <= 16; n++) { 9398 GemmMicrokernelTester() 9399 .mr(5) 9400 .nr(16) 9401 .kr(4) 9402 .sr(1) 9403 .m(5) 9404 .n(n) 9405 .k(8) 9406 .iterations(1) 9407 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9408 } 9409 } 9410 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,k_lt_8)9411 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_lt_8) { 9412 TEST_REQUIRES_ARM_NEON_DOT; 9413 for (size_t k = 1; k < 8; k++) { 9414 GemmMicrokernelTester() 9415 .mr(5) 9416 .nr(16) 9417 .kr(4) 9418 .sr(1) 9419 .m(5) 9420 .n(16) 9421 .k(k) 9422 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9423 } 9424 } 9425 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,k_lt_8_subtile)9426 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_lt_8_subtile) { 9427 TEST_REQUIRES_ARM_NEON_DOT; 9428 for (size_t k = 1; k < 8; k++) { 9429 for (uint32_t n = 1; n <= 16; n++) { 9430 for (uint32_t m = 1; m <= 5; m++) { 9431 GemmMicrokernelTester() 9432 .mr(5) 9433 .nr(16) 9434 .kr(4) 9435 .sr(1) 9436 .m(m) 9437 .n(n) 9438 .k(k) 9439 .iterations(1) 9440 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9441 } 9442 } 9443 } 9444 } 9445 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,k_gt_8)9446 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_gt_8) { 9447 TEST_REQUIRES_ARM_NEON_DOT; 9448 for (size_t k = 9; k < 16; k++) { 9449 GemmMicrokernelTester() 9450 .mr(5) 9451 .nr(16) 9452 .kr(4) 9453 .sr(1) 9454 .m(5) 9455 .n(16) 9456 .k(k) 9457 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9458 } 9459 } 9460 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,k_gt_8_subtile)9461 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_gt_8_subtile) { 9462 TEST_REQUIRES_ARM_NEON_DOT; 9463 for (size_t k = 9; k < 16; k++) { 9464 for (uint32_t n = 1; n <= 16; n++) { 9465 for (uint32_t m = 1; m <= 5; m++) { 9466 GemmMicrokernelTester() 9467 .mr(5) 9468 .nr(16) 9469 .kr(4) 9470 .sr(1) 9471 .m(m) 9472 .n(n) 9473 .k(k) 9474 .iterations(1) 9475 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9476 } 9477 } 9478 } 9479 } 9480 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,k_div_8)9481 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_div_8) { 9482 TEST_REQUIRES_ARM_NEON_DOT; 9483 for (size_t k = 16; k <= 80; k += 8) { 9484 GemmMicrokernelTester() 9485 .mr(5) 9486 .nr(16) 9487 .kr(4) 9488 .sr(1) 9489 .m(5) 9490 .n(16) 9491 .k(k) 9492 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9493 } 9494 } 9495 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,k_div_8_subtile)9496 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_div_8_subtile) { 9497 TEST_REQUIRES_ARM_NEON_DOT; 9498 for (size_t k = 16; k <= 80; k += 8) { 9499 for (uint32_t n = 1; n <= 16; n++) { 9500 for (uint32_t m = 1; m <= 5; m++) { 9501 GemmMicrokernelTester() 9502 .mr(5) 9503 .nr(16) 9504 .kr(4) 9505 .sr(1) 9506 .m(m) 9507 .n(n) 9508 .k(k) 9509 .iterations(1) 9510 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9511 } 9512 } 9513 } 9514 } 9515 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,n_gt_16)9516 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, n_gt_16) { 9517 TEST_REQUIRES_ARM_NEON_DOT; 9518 for (uint32_t n = 17; n < 32; n++) { 9519 for (size_t k = 1; k <= 40; k += 9) { 9520 GemmMicrokernelTester() 9521 .mr(5) 9522 .nr(16) 9523 .kr(4) 9524 .sr(1) 9525 .m(5) 9526 .n(n) 9527 .k(k) 9528 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9529 } 9530 } 9531 } 9532 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,n_gt_16_strided_cn)9533 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, n_gt_16_strided_cn) { 9534 TEST_REQUIRES_ARM_NEON_DOT; 9535 for (uint32_t n = 17; n < 32; n++) { 9536 for (size_t k = 1; k <= 40; k += 9) { 9537 GemmMicrokernelTester() 9538 .mr(5) 9539 .nr(16) 9540 .kr(4) 9541 .sr(1) 9542 .m(5) 9543 .n(n) 9544 .k(k) 9545 .cn_stride(19) 9546 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9547 } 9548 } 9549 } 9550 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,n_gt_16_subtile)9551 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, n_gt_16_subtile) { 9552 TEST_REQUIRES_ARM_NEON_DOT; 9553 for (uint32_t n = 17; n < 32; n++) { 9554 for (size_t k = 1; k <= 40; k += 9) { 9555 for (uint32_t m = 1; m <= 5; m++) { 9556 GemmMicrokernelTester() 9557 .mr(5) 9558 .nr(16) 9559 .kr(4) 9560 .sr(1) 9561 .m(m) 9562 .n(n) 9563 .k(k) 9564 .iterations(1) 9565 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9566 } 9567 } 9568 } 9569 } 9570 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,n_div_16)9571 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, n_div_16) { 9572 TEST_REQUIRES_ARM_NEON_DOT; 9573 for (uint32_t n = 32; n <= 48; n += 16) { 9574 for (size_t k = 1; k <= 40; k += 9) { 9575 GemmMicrokernelTester() 9576 .mr(5) 9577 .nr(16) 9578 .kr(4) 9579 .sr(1) 9580 .m(5) 9581 .n(n) 9582 .k(k) 9583 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9584 } 9585 } 9586 } 9587 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,n_div_16_strided_cn)9588 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, n_div_16_strided_cn) { 9589 TEST_REQUIRES_ARM_NEON_DOT; 9590 for (uint32_t n = 32; n <= 48; n += 16) { 9591 for (size_t k = 1; k <= 40; k += 9) { 9592 GemmMicrokernelTester() 9593 .mr(5) 9594 .nr(16) 9595 .kr(4) 9596 .sr(1) 9597 .m(5) 9598 .n(n) 9599 .k(k) 9600 .cn_stride(19) 9601 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9602 } 9603 } 9604 } 9605 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,n_div_16_subtile)9606 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, n_div_16_subtile) { 9607 TEST_REQUIRES_ARM_NEON_DOT; 9608 for (uint32_t n = 32; n <= 48; n += 16) { 9609 for (size_t k = 1; k <= 40; k += 9) { 9610 for (uint32_t m = 1; m <= 5; m++) { 9611 GemmMicrokernelTester() 9612 .mr(5) 9613 .nr(16) 9614 .kr(4) 9615 .sr(1) 9616 .m(m) 9617 .n(n) 9618 .k(k) 9619 .iterations(1) 9620 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9621 } 9622 } 9623 } 9624 } 9625 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,small_kernel)9626 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, small_kernel) { 9627 TEST_REQUIRES_ARM_NEON_DOT; 9628 for (size_t k = 1; k <= 40; k += 9) { 9629 GemmMicrokernelTester() 9630 .mr(5) 9631 .nr(16) 9632 .kr(4) 9633 .sr(1) 9634 .m(5) 9635 .n(16) 9636 .k(k) 9637 .ks(3) 9638 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9639 } 9640 } 9641 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,small_kernel_subtile)9642 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, small_kernel_subtile) { 9643 TEST_REQUIRES_ARM_NEON_DOT; 9644 for (size_t k = 1; k <= 40; k += 9) { 9645 for (uint32_t n = 1; n <= 16; n++) { 9646 for (uint32_t m = 1; m <= 5; m++) { 9647 GemmMicrokernelTester() 9648 .mr(5) 9649 .nr(16) 9650 .kr(4) 9651 .sr(1) 9652 .m(m) 9653 .n(n) 9654 .k(k) 9655 .ks(3) 9656 .iterations(1) 9657 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9658 } 9659 } 9660 } 9661 } 9662 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,n_gt_16_small_kernel)9663 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, n_gt_16_small_kernel) { 9664 TEST_REQUIRES_ARM_NEON_DOT; 9665 for (uint32_t n = 17; n < 32; n++) { 9666 for (size_t k = 1; k <= 40; k += 9) { 9667 GemmMicrokernelTester() 9668 .mr(5) 9669 .nr(16) 9670 .kr(4) 9671 .sr(1) 9672 .m(5) 9673 .n(n) 9674 .k(k) 9675 .ks(3) 9676 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9677 } 9678 } 9679 } 9680 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,n_div_16_small_kernel)9681 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, n_div_16_small_kernel) { 9682 TEST_REQUIRES_ARM_NEON_DOT; 9683 for (uint32_t n = 32; n <= 48; n += 16) { 9684 for (size_t k = 1; k <= 40; k += 9) { 9685 GemmMicrokernelTester() 9686 .mr(5) 9687 .nr(16) 9688 .kr(4) 9689 .sr(1) 9690 .m(5) 9691 .n(n) 9692 .k(k) 9693 .ks(3) 9694 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9695 } 9696 } 9697 } 9698 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,strided_cm_subtile)9699 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, strided_cm_subtile) { 9700 TEST_REQUIRES_ARM_NEON_DOT; 9701 for (size_t k = 1; k <= 40; k += 9) { 9702 for (uint32_t n = 1; n <= 16; n++) { 9703 for (uint32_t m = 1; m <= 5; m++) { 9704 GemmMicrokernelTester() 9705 .mr(5) 9706 .nr(16) 9707 .kr(4) 9708 .sr(1) 9709 .m(m) 9710 .n(n) 9711 .k(k) 9712 .cm_stride(19) 9713 .iterations(1) 9714 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9715 } 9716 } 9717 } 9718 } 9719 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,a_offset)9720 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, a_offset) { 9721 TEST_REQUIRES_ARM_NEON_DOT; 9722 for (size_t k = 1; k <= 40; k += 9) { 9723 GemmMicrokernelTester() 9724 .mr(5) 9725 .nr(16) 9726 .kr(4) 9727 .sr(1) 9728 .m(5) 9729 .n(16) 9730 .k(k) 9731 .ks(3) 9732 .a_offset(211) 9733 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9734 } 9735 } 9736 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,zero)9737 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, zero) { 9738 TEST_REQUIRES_ARM_NEON_DOT; 9739 for (size_t k = 1; k <= 40; k += 9) { 9740 for (uint32_t mz = 0; mz < 5; mz++) { 9741 GemmMicrokernelTester() 9742 .mr(5) 9743 .nr(16) 9744 .kr(4) 9745 .sr(1) 9746 .m(5) 9747 .n(16) 9748 .k(k) 9749 .ks(3) 9750 .a_offset(211) 9751 .zero_index(mz) 9752 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9753 } 9754 } 9755 } 9756 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,qmin)9757 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, qmin) { 9758 TEST_REQUIRES_ARM_NEON_DOT; 9759 GemmMicrokernelTester() 9760 .mr(5) 9761 .nr(16) 9762 .kr(4) 9763 .sr(1) 9764 .m(5) 9765 .n(16) 9766 .k(8) 9767 .qmin(128) 9768 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9769 } 9770 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,qmax)9771 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, qmax) { 9772 TEST_REQUIRES_ARM_NEON_DOT; 9773 GemmMicrokernelTester() 9774 .mr(5) 9775 .nr(16) 9776 .kr(4) 9777 .sr(1) 9778 .m(5) 9779 .n(16) 9780 .k(8) 9781 .qmax(128) 9782 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9783 } 9784 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,strided_cm)9785 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, strided_cm) { 9786 TEST_REQUIRES_ARM_NEON_DOT; 9787 GemmMicrokernelTester() 9788 .mr(5) 9789 .nr(16) 9790 .kr(4) 9791 .sr(1) 9792 .m(5) 9793 .n(16) 9794 .k(8) 9795 .cm_stride(19) 9796 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9797 } 9798 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,no_a_zero_point)9799 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, no_a_zero_point) { 9800 TEST_REQUIRES_ARM_NEON_DOT; 9801 for (size_t k = 1; k <= 40; k += 9) { 9802 GemmMicrokernelTester() 9803 .mr(5) 9804 .nr(16) 9805 .kr(4) 9806 .sr(1) 9807 .m(5) 9808 .n(16) 9809 .k(k) 9810 .a_zero_point(0) 9811 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9812 } 9813 } 9814 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,no_b_zero_point)9815 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, no_b_zero_point) { 9816 TEST_REQUIRES_ARM_NEON_DOT; 9817 for (size_t k = 1; k <= 40; k += 9) { 9818 GemmMicrokernelTester() 9819 .mr(5) 9820 .nr(16) 9821 .kr(4) 9822 .sr(1) 9823 .m(5) 9824 .n(16) 9825 .k(k) 9826 .b_zero_point(0) 9827 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9828 } 9829 } 9830 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT,no_zero_point)9831 TEST(QU8_IGEMM_MINMAX_RNDNU_5X16C4__NEONDOT, no_zero_point) { 9832 TEST_REQUIRES_ARM_NEON_DOT; 9833 for (size_t k = 1; k <= 40; k += 9) { 9834 GemmMicrokernelTester() 9835 .mr(5) 9836 .nr(16) 9837 .kr(4) 9838 .sr(1) 9839 .m(5) 9840 .n(16) 9841 .k(k) 9842 .a_zero_point(0) 9843 .b_zero_point(0) 9844 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9845 } 9846 } 9847 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64) 9848 9849 9850 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64) TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_eq_8)9851 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8) { 9852 TEST_REQUIRES_ARM_NEON_DOT; 9853 GemmMicrokernelTester() 9854 .mr(6) 9855 .nr(8) 9856 .kr(4) 9857 .sr(1) 9858 .m(6) 9859 .n(8) 9860 .k(8) 9861 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9862 } 9863 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,strided_cn)9864 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, strided_cn) { 9865 TEST_REQUIRES_ARM_NEON_DOT; 9866 GemmMicrokernelTester() 9867 .mr(6) 9868 .nr(8) 9869 .kr(4) 9870 .sr(1) 9871 .m(6) 9872 .n(8) 9873 .k(8) 9874 .cn_stride(11) 9875 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9876 } 9877 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_eq_8_subtile)9878 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8_subtile) { 9879 TEST_REQUIRES_ARM_NEON_DOT; 9880 for (uint32_t n = 1; n <= 8; n++) { 9881 for (uint32_t m = 1; m <= 6; m++) { 9882 GemmMicrokernelTester() 9883 .mr(6) 9884 .nr(8) 9885 .kr(4) 9886 .sr(1) 9887 .m(m) 9888 .n(n) 9889 .k(8) 9890 .iterations(1) 9891 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9892 } 9893 } 9894 } 9895 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_eq_8_subtile_m)9896 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8_subtile_m) { 9897 TEST_REQUIRES_ARM_NEON_DOT; 9898 for (uint32_t m = 1; m <= 6; m++) { 9899 GemmMicrokernelTester() 9900 .mr(6) 9901 .nr(8) 9902 .kr(4) 9903 .sr(1) 9904 .m(m) 9905 .n(8) 9906 .k(8) 9907 .iterations(1) 9908 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9909 } 9910 } 9911 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_eq_8_subtile_n)9912 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8_subtile_n) { 9913 TEST_REQUIRES_ARM_NEON_DOT; 9914 for (uint32_t n = 1; n <= 8; n++) { 9915 GemmMicrokernelTester() 9916 .mr(6) 9917 .nr(8) 9918 .kr(4) 9919 .sr(1) 9920 .m(6) 9921 .n(n) 9922 .k(8) 9923 .iterations(1) 9924 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9925 } 9926 } 9927 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_lt_8)9928 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_lt_8) { 9929 TEST_REQUIRES_ARM_NEON_DOT; 9930 for (size_t k = 1; k < 8; k++) { 9931 GemmMicrokernelTester() 9932 .mr(6) 9933 .nr(8) 9934 .kr(4) 9935 .sr(1) 9936 .m(6) 9937 .n(8) 9938 .k(k) 9939 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9940 } 9941 } 9942 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_lt_8_subtile)9943 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_lt_8_subtile) { 9944 TEST_REQUIRES_ARM_NEON_DOT; 9945 for (size_t k = 1; k < 8; k++) { 9946 for (uint32_t n = 1; n <= 8; n++) { 9947 for (uint32_t m = 1; m <= 6; m++) { 9948 GemmMicrokernelTester() 9949 .mr(6) 9950 .nr(8) 9951 .kr(4) 9952 .sr(1) 9953 .m(m) 9954 .n(n) 9955 .k(k) 9956 .iterations(1) 9957 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9958 } 9959 } 9960 } 9961 } 9962 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_gt_8)9963 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_gt_8) { 9964 TEST_REQUIRES_ARM_NEON_DOT; 9965 for (size_t k = 9; k < 16; k++) { 9966 GemmMicrokernelTester() 9967 .mr(6) 9968 .nr(8) 9969 .kr(4) 9970 .sr(1) 9971 .m(6) 9972 .n(8) 9973 .k(k) 9974 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9975 } 9976 } 9977 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_gt_8_subtile)9978 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_gt_8_subtile) { 9979 TEST_REQUIRES_ARM_NEON_DOT; 9980 for (size_t k = 9; k < 16; k++) { 9981 for (uint32_t n = 1; n <= 8; n++) { 9982 for (uint32_t m = 1; m <= 6; m++) { 9983 GemmMicrokernelTester() 9984 .mr(6) 9985 .nr(8) 9986 .kr(4) 9987 .sr(1) 9988 .m(m) 9989 .n(n) 9990 .k(k) 9991 .iterations(1) 9992 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9993 } 9994 } 9995 } 9996 } 9997 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_div_8)9998 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_div_8) { 9999 TEST_REQUIRES_ARM_NEON_DOT; 10000 for (size_t k = 16; k <= 80; k += 8) { 10001 GemmMicrokernelTester() 10002 .mr(6) 10003 .nr(8) 10004 .kr(4) 10005 .sr(1) 10006 .m(6) 10007 .n(8) 10008 .k(k) 10009 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10010 } 10011 } 10012 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,k_div_8_subtile)10013 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_div_8_subtile) { 10014 TEST_REQUIRES_ARM_NEON_DOT; 10015 for (size_t k = 16; k <= 80; k += 8) { 10016 for (uint32_t n = 1; n <= 8; n++) { 10017 for (uint32_t m = 1; m <= 6; m++) { 10018 GemmMicrokernelTester() 10019 .mr(6) 10020 .nr(8) 10021 .kr(4) 10022 .sr(1) 10023 .m(m) 10024 .n(n) 10025 .k(k) 10026 .iterations(1) 10027 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10028 } 10029 } 10030 } 10031 } 10032 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,n_gt_8)10033 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8) { 10034 TEST_REQUIRES_ARM_NEON_DOT; 10035 for (uint32_t n = 9; n < 16; n++) { 10036 for (size_t k = 1; k <= 40; k += 9) { 10037 GemmMicrokernelTester() 10038 .mr(6) 10039 .nr(8) 10040 .kr(4) 10041 .sr(1) 10042 .m(6) 10043 .n(n) 10044 .k(k) 10045 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10046 } 10047 } 10048 } 10049 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,n_gt_8_strided_cn)10050 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8_strided_cn) { 10051 TEST_REQUIRES_ARM_NEON_DOT; 10052 for (uint32_t n = 9; n < 16; n++) { 10053 for (size_t k = 1; k <= 40; k += 9) { 10054 GemmMicrokernelTester() 10055 .mr(6) 10056 .nr(8) 10057 .kr(4) 10058 .sr(1) 10059 .m(6) 10060 .n(n) 10061 .k(k) 10062 .cn_stride(11) 10063 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10064 } 10065 } 10066 } 10067 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,n_gt_8_subtile)10068 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8_subtile) { 10069 TEST_REQUIRES_ARM_NEON_DOT; 10070 for (uint32_t n = 9; n < 16; n++) { 10071 for (size_t k = 1; k <= 40; k += 9) { 10072 for (uint32_t m = 1; m <= 6; m++) { 10073 GemmMicrokernelTester() 10074 .mr(6) 10075 .nr(8) 10076 .kr(4) 10077 .sr(1) 10078 .m(m) 10079 .n(n) 10080 .k(k) 10081 .iterations(1) 10082 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10083 } 10084 } 10085 } 10086 } 10087 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,n_div_8)10088 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8) { 10089 TEST_REQUIRES_ARM_NEON_DOT; 10090 for (uint32_t n = 16; n <= 24; n += 8) { 10091 for (size_t k = 1; k <= 40; k += 9) { 10092 GemmMicrokernelTester() 10093 .mr(6) 10094 .nr(8) 10095 .kr(4) 10096 .sr(1) 10097 .m(6) 10098 .n(n) 10099 .k(k) 10100 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10101 } 10102 } 10103 } 10104 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,n_div_8_strided_cn)10105 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8_strided_cn) { 10106 TEST_REQUIRES_ARM_NEON_DOT; 10107 for (uint32_t n = 16; n <= 24; n += 8) { 10108 for (size_t k = 1; k <= 40; k += 9) { 10109 GemmMicrokernelTester() 10110 .mr(6) 10111 .nr(8) 10112 .kr(4) 10113 .sr(1) 10114 .m(6) 10115 .n(n) 10116 .k(k) 10117 .cn_stride(11) 10118 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10119 } 10120 } 10121 } 10122 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,n_div_8_subtile)10123 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8_subtile) { 10124 TEST_REQUIRES_ARM_NEON_DOT; 10125 for (uint32_t n = 16; n <= 24; n += 8) { 10126 for (size_t k = 1; k <= 40; k += 9) { 10127 for (uint32_t m = 1; m <= 6; m++) { 10128 GemmMicrokernelTester() 10129 .mr(6) 10130 .nr(8) 10131 .kr(4) 10132 .sr(1) 10133 .m(m) 10134 .n(n) 10135 .k(k) 10136 .iterations(1) 10137 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10138 } 10139 } 10140 } 10141 } 10142 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,small_kernel)10143 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, small_kernel) { 10144 TEST_REQUIRES_ARM_NEON_DOT; 10145 for (size_t k = 1; k <= 40; k += 9) { 10146 GemmMicrokernelTester() 10147 .mr(6) 10148 .nr(8) 10149 .kr(4) 10150 .sr(1) 10151 .m(6) 10152 .n(8) 10153 .k(k) 10154 .ks(3) 10155 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10156 } 10157 } 10158 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,small_kernel_subtile)10159 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, small_kernel_subtile) { 10160 TEST_REQUIRES_ARM_NEON_DOT; 10161 for (size_t k = 1; k <= 40; k += 9) { 10162 for (uint32_t n = 1; n <= 8; n++) { 10163 for (uint32_t m = 1; m <= 6; m++) { 10164 GemmMicrokernelTester() 10165 .mr(6) 10166 .nr(8) 10167 .kr(4) 10168 .sr(1) 10169 .m(m) 10170 .n(n) 10171 .k(k) 10172 .ks(3) 10173 .iterations(1) 10174 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10175 } 10176 } 10177 } 10178 } 10179 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,n_gt_8_small_kernel)10180 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8_small_kernel) { 10181 TEST_REQUIRES_ARM_NEON_DOT; 10182 for (uint32_t n = 9; n < 16; n++) { 10183 for (size_t k = 1; k <= 40; k += 9) { 10184 GemmMicrokernelTester() 10185 .mr(6) 10186 .nr(8) 10187 .kr(4) 10188 .sr(1) 10189 .m(6) 10190 .n(n) 10191 .k(k) 10192 .ks(3) 10193 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10194 } 10195 } 10196 } 10197 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,n_div_8_small_kernel)10198 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8_small_kernel) { 10199 TEST_REQUIRES_ARM_NEON_DOT; 10200 for (uint32_t n = 16; n <= 24; n += 8) { 10201 for (size_t k = 1; k <= 40; k += 9) { 10202 GemmMicrokernelTester() 10203 .mr(6) 10204 .nr(8) 10205 .kr(4) 10206 .sr(1) 10207 .m(6) 10208 .n(n) 10209 .k(k) 10210 .ks(3) 10211 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10212 } 10213 } 10214 } 10215 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,strided_cm_subtile)10216 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, strided_cm_subtile) { 10217 TEST_REQUIRES_ARM_NEON_DOT; 10218 for (size_t k = 1; k <= 40; k += 9) { 10219 for (uint32_t n = 1; n <= 8; n++) { 10220 for (uint32_t m = 1; m <= 6; m++) { 10221 GemmMicrokernelTester() 10222 .mr(6) 10223 .nr(8) 10224 .kr(4) 10225 .sr(1) 10226 .m(m) 10227 .n(n) 10228 .k(k) 10229 .cm_stride(11) 10230 .iterations(1) 10231 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10232 } 10233 } 10234 } 10235 } 10236 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,a_offset)10237 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, a_offset) { 10238 TEST_REQUIRES_ARM_NEON_DOT; 10239 for (size_t k = 1; k <= 40; k += 9) { 10240 GemmMicrokernelTester() 10241 .mr(6) 10242 .nr(8) 10243 .kr(4) 10244 .sr(1) 10245 .m(6) 10246 .n(8) 10247 .k(k) 10248 .ks(3) 10249 .a_offset(251) 10250 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10251 } 10252 } 10253 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,zero)10254 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, zero) { 10255 TEST_REQUIRES_ARM_NEON_DOT; 10256 for (size_t k = 1; k <= 40; k += 9) { 10257 for (uint32_t mz = 0; mz < 6; mz++) { 10258 GemmMicrokernelTester() 10259 .mr(6) 10260 .nr(8) 10261 .kr(4) 10262 .sr(1) 10263 .m(6) 10264 .n(8) 10265 .k(k) 10266 .ks(3) 10267 .a_offset(251) 10268 .zero_index(mz) 10269 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10270 } 10271 } 10272 } 10273 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,qmin)10274 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, qmin) { 10275 TEST_REQUIRES_ARM_NEON_DOT; 10276 GemmMicrokernelTester() 10277 .mr(6) 10278 .nr(8) 10279 .kr(4) 10280 .sr(1) 10281 .m(6) 10282 .n(8) 10283 .k(8) 10284 .qmin(128) 10285 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10286 } 10287 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,qmax)10288 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, qmax) { 10289 TEST_REQUIRES_ARM_NEON_DOT; 10290 GemmMicrokernelTester() 10291 .mr(6) 10292 .nr(8) 10293 .kr(4) 10294 .sr(1) 10295 .m(6) 10296 .n(8) 10297 .k(8) 10298 .qmax(128) 10299 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10300 } 10301 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,strided_cm)10302 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, strided_cm) { 10303 TEST_REQUIRES_ARM_NEON_DOT; 10304 GemmMicrokernelTester() 10305 .mr(6) 10306 .nr(8) 10307 .kr(4) 10308 .sr(1) 10309 .m(6) 10310 .n(8) 10311 .k(8) 10312 .cm_stride(11) 10313 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10314 } 10315 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,no_a_zero_point)10316 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, no_a_zero_point) { 10317 TEST_REQUIRES_ARM_NEON_DOT; 10318 for (size_t k = 1; k <= 40; k += 9) { 10319 GemmMicrokernelTester() 10320 .mr(6) 10321 .nr(8) 10322 .kr(4) 10323 .sr(1) 10324 .m(6) 10325 .n(8) 10326 .k(k) 10327 .a_zero_point(0) 10328 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10329 } 10330 } 10331 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,no_b_zero_point)10332 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, no_b_zero_point) { 10333 TEST_REQUIRES_ARM_NEON_DOT; 10334 for (size_t k = 1; k <= 40; k += 9) { 10335 GemmMicrokernelTester() 10336 .mr(6) 10337 .nr(8) 10338 .kr(4) 10339 .sr(1) 10340 .m(6) 10341 .n(8) 10342 .k(k) 10343 .b_zero_point(0) 10344 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10345 } 10346 } 10347 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT,no_zero_point)10348 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, no_zero_point) { 10349 TEST_REQUIRES_ARM_NEON_DOT; 10350 for (size_t k = 1; k <= 40; k += 9) { 10351 GemmMicrokernelTester() 10352 .mr(6) 10353 .nr(8) 10354 .kr(4) 10355 .sr(1) 10356 .m(6) 10357 .n(8) 10358 .k(k) 10359 .a_zero_point(0) 10360 .b_zero_point(0) 10361 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10362 } 10363 } 10364 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64) 10365 10366 10367 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64) TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_eq_8)10368 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_eq_8) { 10369 TEST_REQUIRES_ARM_NEON_DOT; 10370 GemmMicrokernelTester() 10371 .mr(8) 10372 .nr(8) 10373 .kr(4) 10374 .sr(1) 10375 .m(8) 10376 .n(8) 10377 .k(8) 10378 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10379 } 10380 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,strided_cn)10381 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, strided_cn) { 10382 TEST_REQUIRES_ARM_NEON_DOT; 10383 GemmMicrokernelTester() 10384 .mr(8) 10385 .nr(8) 10386 .kr(4) 10387 .sr(1) 10388 .m(8) 10389 .n(8) 10390 .k(8) 10391 .cn_stride(11) 10392 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10393 } 10394 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_eq_8_subtile)10395 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_eq_8_subtile) { 10396 TEST_REQUIRES_ARM_NEON_DOT; 10397 for (uint32_t n = 1; n <= 8; n++) { 10398 for (uint32_t m = 1; m <= 8; m++) { 10399 GemmMicrokernelTester() 10400 .mr(8) 10401 .nr(8) 10402 .kr(4) 10403 .sr(1) 10404 .m(m) 10405 .n(n) 10406 .k(8) 10407 .iterations(1) 10408 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10409 } 10410 } 10411 } 10412 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_eq_8_subtile_m)10413 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_eq_8_subtile_m) { 10414 TEST_REQUIRES_ARM_NEON_DOT; 10415 for (uint32_t m = 1; m <= 8; m++) { 10416 GemmMicrokernelTester() 10417 .mr(8) 10418 .nr(8) 10419 .kr(4) 10420 .sr(1) 10421 .m(m) 10422 .n(8) 10423 .k(8) 10424 .iterations(1) 10425 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10426 } 10427 } 10428 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_eq_8_subtile_n)10429 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_eq_8_subtile_n) { 10430 TEST_REQUIRES_ARM_NEON_DOT; 10431 for (uint32_t n = 1; n <= 8; n++) { 10432 GemmMicrokernelTester() 10433 .mr(8) 10434 .nr(8) 10435 .kr(4) 10436 .sr(1) 10437 .m(8) 10438 .n(n) 10439 .k(8) 10440 .iterations(1) 10441 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10442 } 10443 } 10444 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_lt_8)10445 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_lt_8) { 10446 TEST_REQUIRES_ARM_NEON_DOT; 10447 for (size_t k = 1; k < 8; k++) { 10448 GemmMicrokernelTester() 10449 .mr(8) 10450 .nr(8) 10451 .kr(4) 10452 .sr(1) 10453 .m(8) 10454 .n(8) 10455 .k(k) 10456 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10457 } 10458 } 10459 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_lt_8_subtile)10460 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_lt_8_subtile) { 10461 TEST_REQUIRES_ARM_NEON_DOT; 10462 for (size_t k = 1; k < 8; k++) { 10463 for (uint32_t n = 1; n <= 8; n++) { 10464 for (uint32_t m = 1; m <= 8; m++) { 10465 GemmMicrokernelTester() 10466 .mr(8) 10467 .nr(8) 10468 .kr(4) 10469 .sr(1) 10470 .m(m) 10471 .n(n) 10472 .k(k) 10473 .iterations(1) 10474 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10475 } 10476 } 10477 } 10478 } 10479 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_gt_8)10480 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_gt_8) { 10481 TEST_REQUIRES_ARM_NEON_DOT; 10482 for (size_t k = 9; k < 16; k++) { 10483 GemmMicrokernelTester() 10484 .mr(8) 10485 .nr(8) 10486 .kr(4) 10487 .sr(1) 10488 .m(8) 10489 .n(8) 10490 .k(k) 10491 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10492 } 10493 } 10494 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_gt_8_subtile)10495 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_gt_8_subtile) { 10496 TEST_REQUIRES_ARM_NEON_DOT; 10497 for (size_t k = 9; k < 16; k++) { 10498 for (uint32_t n = 1; n <= 8; n++) { 10499 for (uint32_t m = 1; m <= 8; m++) { 10500 GemmMicrokernelTester() 10501 .mr(8) 10502 .nr(8) 10503 .kr(4) 10504 .sr(1) 10505 .m(m) 10506 .n(n) 10507 .k(k) 10508 .iterations(1) 10509 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10510 } 10511 } 10512 } 10513 } 10514 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_div_8)10515 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_div_8) { 10516 TEST_REQUIRES_ARM_NEON_DOT; 10517 for (size_t k = 16; k <= 80; k += 8) { 10518 GemmMicrokernelTester() 10519 .mr(8) 10520 .nr(8) 10521 .kr(4) 10522 .sr(1) 10523 .m(8) 10524 .n(8) 10525 .k(k) 10526 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10527 } 10528 } 10529 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,k_div_8_subtile)10530 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_div_8_subtile) { 10531 TEST_REQUIRES_ARM_NEON_DOT; 10532 for (size_t k = 16; k <= 80; k += 8) { 10533 for (uint32_t n = 1; n <= 8; n++) { 10534 for (uint32_t m = 1; m <= 8; m++) { 10535 GemmMicrokernelTester() 10536 .mr(8) 10537 .nr(8) 10538 .kr(4) 10539 .sr(1) 10540 .m(m) 10541 .n(n) 10542 .k(k) 10543 .iterations(1) 10544 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10545 } 10546 } 10547 } 10548 } 10549 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,n_gt_8)10550 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_gt_8) { 10551 TEST_REQUIRES_ARM_NEON_DOT; 10552 for (uint32_t n = 9; n < 16; n++) { 10553 for (size_t k = 1; k <= 40; k += 9) { 10554 GemmMicrokernelTester() 10555 .mr(8) 10556 .nr(8) 10557 .kr(4) 10558 .sr(1) 10559 .m(8) 10560 .n(n) 10561 .k(k) 10562 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10563 } 10564 } 10565 } 10566 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,n_gt_8_strided_cn)10567 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_gt_8_strided_cn) { 10568 TEST_REQUIRES_ARM_NEON_DOT; 10569 for (uint32_t n = 9; n < 16; n++) { 10570 for (size_t k = 1; k <= 40; k += 9) { 10571 GemmMicrokernelTester() 10572 .mr(8) 10573 .nr(8) 10574 .kr(4) 10575 .sr(1) 10576 .m(8) 10577 .n(n) 10578 .k(k) 10579 .cn_stride(11) 10580 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10581 } 10582 } 10583 } 10584 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,n_gt_8_subtile)10585 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_gt_8_subtile) { 10586 TEST_REQUIRES_ARM_NEON_DOT; 10587 for (uint32_t n = 9; n < 16; n++) { 10588 for (size_t k = 1; k <= 40; k += 9) { 10589 for (uint32_t m = 1; m <= 8; m++) { 10590 GemmMicrokernelTester() 10591 .mr(8) 10592 .nr(8) 10593 .kr(4) 10594 .sr(1) 10595 .m(m) 10596 .n(n) 10597 .k(k) 10598 .iterations(1) 10599 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10600 } 10601 } 10602 } 10603 } 10604 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,n_div_8)10605 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_div_8) { 10606 TEST_REQUIRES_ARM_NEON_DOT; 10607 for (uint32_t n = 16; n <= 24; n += 8) { 10608 for (size_t k = 1; k <= 40; k += 9) { 10609 GemmMicrokernelTester() 10610 .mr(8) 10611 .nr(8) 10612 .kr(4) 10613 .sr(1) 10614 .m(8) 10615 .n(n) 10616 .k(k) 10617 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10618 } 10619 } 10620 } 10621 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,n_div_8_strided_cn)10622 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_div_8_strided_cn) { 10623 TEST_REQUIRES_ARM_NEON_DOT; 10624 for (uint32_t n = 16; n <= 24; n += 8) { 10625 for (size_t k = 1; k <= 40; k += 9) { 10626 GemmMicrokernelTester() 10627 .mr(8) 10628 .nr(8) 10629 .kr(4) 10630 .sr(1) 10631 .m(8) 10632 .n(n) 10633 .k(k) 10634 .cn_stride(11) 10635 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10636 } 10637 } 10638 } 10639 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,n_div_8_subtile)10640 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_div_8_subtile) { 10641 TEST_REQUIRES_ARM_NEON_DOT; 10642 for (uint32_t n = 16; n <= 24; n += 8) { 10643 for (size_t k = 1; k <= 40; k += 9) { 10644 for (uint32_t m = 1; m <= 8; m++) { 10645 GemmMicrokernelTester() 10646 .mr(8) 10647 .nr(8) 10648 .kr(4) 10649 .sr(1) 10650 .m(m) 10651 .n(n) 10652 .k(k) 10653 .iterations(1) 10654 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10655 } 10656 } 10657 } 10658 } 10659 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,small_kernel)10660 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, small_kernel) { 10661 TEST_REQUIRES_ARM_NEON_DOT; 10662 for (size_t k = 1; k <= 40; k += 9) { 10663 GemmMicrokernelTester() 10664 .mr(8) 10665 .nr(8) 10666 .kr(4) 10667 .sr(1) 10668 .m(8) 10669 .n(8) 10670 .k(k) 10671 .ks(3) 10672 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10673 } 10674 } 10675 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,small_kernel_subtile)10676 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, small_kernel_subtile) { 10677 TEST_REQUIRES_ARM_NEON_DOT; 10678 for (size_t k = 1; k <= 40; k += 9) { 10679 for (uint32_t n = 1; n <= 8; n++) { 10680 for (uint32_t m = 1; m <= 8; m++) { 10681 GemmMicrokernelTester() 10682 .mr(8) 10683 .nr(8) 10684 .kr(4) 10685 .sr(1) 10686 .m(m) 10687 .n(n) 10688 .k(k) 10689 .ks(3) 10690 .iterations(1) 10691 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10692 } 10693 } 10694 } 10695 } 10696 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,n_gt_8_small_kernel)10697 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_gt_8_small_kernel) { 10698 TEST_REQUIRES_ARM_NEON_DOT; 10699 for (uint32_t n = 9; n < 16; n++) { 10700 for (size_t k = 1; k <= 40; k += 9) { 10701 GemmMicrokernelTester() 10702 .mr(8) 10703 .nr(8) 10704 .kr(4) 10705 .sr(1) 10706 .m(8) 10707 .n(n) 10708 .k(k) 10709 .ks(3) 10710 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10711 } 10712 } 10713 } 10714 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,n_div_8_small_kernel)10715 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_div_8_small_kernel) { 10716 TEST_REQUIRES_ARM_NEON_DOT; 10717 for (uint32_t n = 16; n <= 24; n += 8) { 10718 for (size_t k = 1; k <= 40; k += 9) { 10719 GemmMicrokernelTester() 10720 .mr(8) 10721 .nr(8) 10722 .kr(4) 10723 .sr(1) 10724 .m(8) 10725 .n(n) 10726 .k(k) 10727 .ks(3) 10728 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10729 } 10730 } 10731 } 10732 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,strided_cm_subtile)10733 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, strided_cm_subtile) { 10734 TEST_REQUIRES_ARM_NEON_DOT; 10735 for (size_t k = 1; k <= 40; k += 9) { 10736 for (uint32_t n = 1; n <= 8; n++) { 10737 for (uint32_t m = 1; m <= 8; m++) { 10738 GemmMicrokernelTester() 10739 .mr(8) 10740 .nr(8) 10741 .kr(4) 10742 .sr(1) 10743 .m(m) 10744 .n(n) 10745 .k(k) 10746 .cm_stride(11) 10747 .iterations(1) 10748 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10749 } 10750 } 10751 } 10752 } 10753 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,a_offset)10754 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, a_offset) { 10755 TEST_REQUIRES_ARM_NEON_DOT; 10756 for (size_t k = 1; k <= 40; k += 9) { 10757 GemmMicrokernelTester() 10758 .mr(8) 10759 .nr(8) 10760 .kr(4) 10761 .sr(1) 10762 .m(8) 10763 .n(8) 10764 .k(k) 10765 .ks(3) 10766 .a_offset(331) 10767 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10768 } 10769 } 10770 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,zero)10771 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, zero) { 10772 TEST_REQUIRES_ARM_NEON_DOT; 10773 for (size_t k = 1; k <= 40; k += 9) { 10774 for (uint32_t mz = 0; mz < 8; mz++) { 10775 GemmMicrokernelTester() 10776 .mr(8) 10777 .nr(8) 10778 .kr(4) 10779 .sr(1) 10780 .m(8) 10781 .n(8) 10782 .k(k) 10783 .ks(3) 10784 .a_offset(331) 10785 .zero_index(mz) 10786 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10787 } 10788 } 10789 } 10790 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,qmin)10791 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, qmin) { 10792 TEST_REQUIRES_ARM_NEON_DOT; 10793 GemmMicrokernelTester() 10794 .mr(8) 10795 .nr(8) 10796 .kr(4) 10797 .sr(1) 10798 .m(8) 10799 .n(8) 10800 .k(8) 10801 .qmin(128) 10802 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10803 } 10804 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,qmax)10805 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, qmax) { 10806 TEST_REQUIRES_ARM_NEON_DOT; 10807 GemmMicrokernelTester() 10808 .mr(8) 10809 .nr(8) 10810 .kr(4) 10811 .sr(1) 10812 .m(8) 10813 .n(8) 10814 .k(8) 10815 .qmax(128) 10816 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10817 } 10818 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,strided_cm)10819 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, strided_cm) { 10820 TEST_REQUIRES_ARM_NEON_DOT; 10821 GemmMicrokernelTester() 10822 .mr(8) 10823 .nr(8) 10824 .kr(4) 10825 .sr(1) 10826 .m(8) 10827 .n(8) 10828 .k(8) 10829 .cm_stride(11) 10830 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10831 } 10832 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,no_a_zero_point)10833 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, no_a_zero_point) { 10834 TEST_REQUIRES_ARM_NEON_DOT; 10835 for (size_t k = 1; k <= 40; k += 9) { 10836 GemmMicrokernelTester() 10837 .mr(8) 10838 .nr(8) 10839 .kr(4) 10840 .sr(1) 10841 .m(8) 10842 .n(8) 10843 .k(k) 10844 .a_zero_point(0) 10845 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10846 } 10847 } 10848 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,no_b_zero_point)10849 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, no_b_zero_point) { 10850 TEST_REQUIRES_ARM_NEON_DOT; 10851 for (size_t k = 1; k <= 40; k += 9) { 10852 GemmMicrokernelTester() 10853 .mr(8) 10854 .nr(8) 10855 .kr(4) 10856 .sr(1) 10857 .m(8) 10858 .n(8) 10859 .k(k) 10860 .b_zero_point(0) 10861 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10862 } 10863 } 10864 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT,no_zero_point)10865 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, no_zero_point) { 10866 TEST_REQUIRES_ARM_NEON_DOT; 10867 for (size_t k = 1; k <= 40; k += 9) { 10868 GemmMicrokernelTester() 10869 .mr(8) 10870 .nr(8) 10871 .kr(4) 10872 .sr(1) 10873 .m(8) 10874 .n(8) 10875 .k(k) 10876 .a_zero_point(0) 10877 .b_zero_point(0) 10878 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10879 } 10880 } 10881 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64) 10882 10883 10884 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64) TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_eq_8)10885 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8) { 10886 TEST_REQUIRES_ARM_NEON_DOT; 10887 GemmMicrokernelTester() 10888 .mr(8) 10889 .nr(16) 10890 .kr(4) 10891 .sr(1) 10892 .m(8) 10893 .n(16) 10894 .k(8) 10895 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10896 } 10897 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,strided_cn)10898 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, strided_cn) { 10899 TEST_REQUIRES_ARM_NEON_DOT; 10900 GemmMicrokernelTester() 10901 .mr(8) 10902 .nr(16) 10903 .kr(4) 10904 .sr(1) 10905 .m(8) 10906 .n(16) 10907 .k(8) 10908 .cn_stride(19) 10909 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10910 } 10911 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_eq_8_subtile)10912 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8_subtile) { 10913 TEST_REQUIRES_ARM_NEON_DOT; 10914 for (uint32_t n = 1; n <= 16; n++) { 10915 for (uint32_t m = 1; m <= 8; m++) { 10916 GemmMicrokernelTester() 10917 .mr(8) 10918 .nr(16) 10919 .kr(4) 10920 .sr(1) 10921 .m(m) 10922 .n(n) 10923 .k(8) 10924 .iterations(1) 10925 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10926 } 10927 } 10928 } 10929 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_eq_8_subtile_m)10930 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8_subtile_m) { 10931 TEST_REQUIRES_ARM_NEON_DOT; 10932 for (uint32_t m = 1; m <= 8; m++) { 10933 GemmMicrokernelTester() 10934 .mr(8) 10935 .nr(16) 10936 .kr(4) 10937 .sr(1) 10938 .m(m) 10939 .n(16) 10940 .k(8) 10941 .iterations(1) 10942 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10943 } 10944 } 10945 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_eq_8_subtile_n)10946 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8_subtile_n) { 10947 TEST_REQUIRES_ARM_NEON_DOT; 10948 for (uint32_t n = 1; n <= 16; n++) { 10949 GemmMicrokernelTester() 10950 .mr(8) 10951 .nr(16) 10952 .kr(4) 10953 .sr(1) 10954 .m(8) 10955 .n(n) 10956 .k(8) 10957 .iterations(1) 10958 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10959 } 10960 } 10961 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_lt_8)10962 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_lt_8) { 10963 TEST_REQUIRES_ARM_NEON_DOT; 10964 for (size_t k = 1; k < 8; k++) { 10965 GemmMicrokernelTester() 10966 .mr(8) 10967 .nr(16) 10968 .kr(4) 10969 .sr(1) 10970 .m(8) 10971 .n(16) 10972 .k(k) 10973 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10974 } 10975 } 10976 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_lt_8_subtile)10977 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_lt_8_subtile) { 10978 TEST_REQUIRES_ARM_NEON_DOT; 10979 for (size_t k = 1; k < 8; k++) { 10980 for (uint32_t n = 1; n <= 16; n++) { 10981 for (uint32_t m = 1; m <= 8; m++) { 10982 GemmMicrokernelTester() 10983 .mr(8) 10984 .nr(16) 10985 .kr(4) 10986 .sr(1) 10987 .m(m) 10988 .n(n) 10989 .k(k) 10990 .iterations(1) 10991 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10992 } 10993 } 10994 } 10995 } 10996 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_gt_8)10997 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_gt_8) { 10998 TEST_REQUIRES_ARM_NEON_DOT; 10999 for (size_t k = 9; k < 16; k++) { 11000 GemmMicrokernelTester() 11001 .mr(8) 11002 .nr(16) 11003 .kr(4) 11004 .sr(1) 11005 .m(8) 11006 .n(16) 11007 .k(k) 11008 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 11009 } 11010 } 11011 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_gt_8_subtile)11012 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_gt_8_subtile) { 11013 TEST_REQUIRES_ARM_NEON_DOT; 11014 for (size_t k = 9; k < 16; k++) { 11015 for (uint32_t n = 1; n <= 16; n++) { 11016 for (uint32_t m = 1; m <= 8; m++) { 11017 GemmMicrokernelTester() 11018 .mr(8) 11019 .nr(16) 11020 .kr(4) 11021 .sr(1) 11022 .m(m) 11023 .n(n) 11024 .k(k) 11025 .iterations(1) 11026 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 11027 } 11028 } 11029 } 11030 } 11031 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_div_8)11032 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_div_8) { 11033 TEST_REQUIRES_ARM_NEON_DOT; 11034 for (size_t k = 16; k <= 80; k += 8) { 11035 GemmMicrokernelTester() 11036 .mr(8) 11037 .nr(16) 11038 .kr(4) 11039 .sr(1) 11040 .m(8) 11041 .n(16) 11042 .k(k) 11043 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 11044 } 11045 } 11046 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_div_8_subtile)11047 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_div_8_subtile) { 11048 TEST_REQUIRES_ARM_NEON_DOT; 11049 for (size_t k = 16; k <= 80; k += 8) { 11050 for (uint32_t n = 1; n <= 16; n++) { 11051 for (uint32_t m = 1; m <= 8; m++) { 11052 GemmMicrokernelTester() 11053 .mr(8) 11054 .nr(16) 11055 .kr(4) 11056 .sr(1) 11057 .m(m) 11058 .n(n) 11059 .k(k) 11060 .iterations(1) 11061 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 11062 } 11063 } 11064 } 11065 } 11066 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,n_gt_16)11067 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16) { 11068 TEST_REQUIRES_ARM_NEON_DOT; 11069 for (uint32_t n = 17; n < 32; n++) { 11070 for (size_t k = 1; k <= 40; k += 9) { 11071 GemmMicrokernelTester() 11072 .mr(8) 11073 .nr(16) 11074 .kr(4) 11075 .sr(1) 11076 .m(8) 11077 .n(n) 11078 .k(k) 11079 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 11080 } 11081 } 11082 } 11083 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,n_gt_16_strided_cn)11084 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16_strided_cn) { 11085 TEST_REQUIRES_ARM_NEON_DOT; 11086 for (uint32_t n = 17; n < 32; n++) { 11087 for (size_t k = 1; k <= 40; k += 9) { 11088 GemmMicrokernelTester() 11089 .mr(8) 11090 .nr(16) 11091 .kr(4) 11092 .sr(1) 11093 .m(8) 11094 .n(n) 11095 .k(k) 11096 .cn_stride(19) 11097 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 11098 } 11099 } 11100 } 11101 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,n_gt_16_subtile)11102 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16_subtile) { 11103 TEST_REQUIRES_ARM_NEON_DOT; 11104 for (uint32_t n = 17; n < 32; n++) { 11105 for (size_t k = 1; k <= 40; k += 9) { 11106 for (uint32_t m = 1; m <= 8; m++) { 11107 GemmMicrokernelTester() 11108 .mr(8) 11109 .nr(16) 11110 .kr(4) 11111 .sr(1) 11112 .m(m) 11113 .n(n) 11114 .k(k) 11115 .iterations(1) 11116 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 11117 } 11118 } 11119 } 11120 } 11121 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,n_div_16)11122 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16) { 11123 TEST_REQUIRES_ARM_NEON_DOT; 11124 for (uint32_t n = 32; n <= 48; n += 16) { 11125 for (size_t k = 1; k <= 40; k += 9) { 11126 GemmMicrokernelTester() 11127 .mr(8) 11128 .nr(16) 11129 .kr(4) 11130 .sr(1) 11131 .m(8) 11132 .n(n) 11133 .k(k) 11134 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 11135 } 11136 } 11137 } 11138 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,n_div_16_strided_cn)11139 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16_strided_cn) { 11140 TEST_REQUIRES_ARM_NEON_DOT; 11141 for (uint32_t n = 32; n <= 48; n += 16) { 11142 for (size_t k = 1; k <= 40; k += 9) { 11143 GemmMicrokernelTester() 11144 .mr(8) 11145 .nr(16) 11146 .kr(4) 11147 .sr(1) 11148 .m(8) 11149 .n(n) 11150 .k(k) 11151 .cn_stride(19) 11152 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 11153 } 11154 } 11155 } 11156 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,n_div_16_subtile)11157 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16_subtile) { 11158 TEST_REQUIRES_ARM_NEON_DOT; 11159 for (uint32_t n = 32; n <= 48; n += 16) { 11160 for (size_t k = 1; k <= 40; k += 9) { 11161 for (uint32_t m = 1; m <= 8; m++) { 11162 GemmMicrokernelTester() 11163 .mr(8) 11164 .nr(16) 11165 .kr(4) 11166 .sr(1) 11167 .m(m) 11168 .n(n) 11169 .k(k) 11170 .iterations(1) 11171 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 11172 } 11173 } 11174 } 11175 } 11176 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,small_kernel)11177 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, small_kernel) { 11178 TEST_REQUIRES_ARM_NEON_DOT; 11179 for (size_t k = 1; k <= 40; k += 9) { 11180 GemmMicrokernelTester() 11181 .mr(8) 11182 .nr(16) 11183 .kr(4) 11184 .sr(1) 11185 .m(8) 11186 .n(16) 11187 .k(k) 11188 .ks(3) 11189 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 11190 } 11191 } 11192 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,small_kernel_subtile)11193 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, small_kernel_subtile) { 11194 TEST_REQUIRES_ARM_NEON_DOT; 11195 for (size_t k = 1; k <= 40; k += 9) { 11196 for (uint32_t n = 1; n <= 16; n++) { 11197 for (uint32_t m = 1; m <= 8; m++) { 11198 GemmMicrokernelTester() 11199 .mr(8) 11200 .nr(16) 11201 .kr(4) 11202 .sr(1) 11203 .m(m) 11204 .n(n) 11205 .k(k) 11206 .ks(3) 11207 .iterations(1) 11208 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 11209 } 11210 } 11211 } 11212 } 11213 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,n_gt_16_small_kernel)11214 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16_small_kernel) { 11215 TEST_REQUIRES_ARM_NEON_DOT; 11216 for (uint32_t n = 17; n < 32; n++) { 11217 for (size_t k = 1; k <= 40; k += 9) { 11218 GemmMicrokernelTester() 11219 .mr(8) 11220 .nr(16) 11221 .kr(4) 11222 .sr(1) 11223 .m(8) 11224 .n(n) 11225 .k(k) 11226 .ks(3) 11227 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 11228 } 11229 } 11230 } 11231 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,n_div_16_small_kernel)11232 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16_small_kernel) { 11233 TEST_REQUIRES_ARM_NEON_DOT; 11234 for (uint32_t n = 32; n <= 48; n += 16) { 11235 for (size_t k = 1; k <= 40; k += 9) { 11236 GemmMicrokernelTester() 11237 .mr(8) 11238 .nr(16) 11239 .kr(4) 11240 .sr(1) 11241 .m(8) 11242 .n(n) 11243 .k(k) 11244 .ks(3) 11245 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 11246 } 11247 } 11248 } 11249 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,strided_cm_subtile)11250 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, strided_cm_subtile) { 11251 TEST_REQUIRES_ARM_NEON_DOT; 11252 for (size_t k = 1; k <= 40; k += 9) { 11253 for (uint32_t n = 1; n <= 16; n++) { 11254 for (uint32_t m = 1; m <= 8; m++) { 11255 GemmMicrokernelTester() 11256 .mr(8) 11257 .nr(16) 11258 .kr(4) 11259 .sr(1) 11260 .m(m) 11261 .n(n) 11262 .k(k) 11263 .cm_stride(19) 11264 .iterations(1) 11265 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 11266 } 11267 } 11268 } 11269 } 11270 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,a_offset)11271 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, a_offset) { 11272 TEST_REQUIRES_ARM_NEON_DOT; 11273 for (size_t k = 1; k <= 40; k += 9) { 11274 GemmMicrokernelTester() 11275 .mr(8) 11276 .nr(16) 11277 .kr(4) 11278 .sr(1) 11279 .m(8) 11280 .n(16) 11281 .k(k) 11282 .ks(3) 11283 .a_offset(331) 11284 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 11285 } 11286 } 11287 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,zero)11288 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, zero) { 11289 TEST_REQUIRES_ARM_NEON_DOT; 11290 for (size_t k = 1; k <= 40; k += 9) { 11291 for (uint32_t mz = 0; mz < 8; mz++) { 11292 GemmMicrokernelTester() 11293 .mr(8) 11294 .nr(16) 11295 .kr(4) 11296 .sr(1) 11297 .m(8) 11298 .n(16) 11299 .k(k) 11300 .ks(3) 11301 .a_offset(331) 11302 .zero_index(mz) 11303 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 11304 } 11305 } 11306 } 11307 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,qmin)11308 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, qmin) { 11309 TEST_REQUIRES_ARM_NEON_DOT; 11310 GemmMicrokernelTester() 11311 .mr(8) 11312 .nr(16) 11313 .kr(4) 11314 .sr(1) 11315 .m(8) 11316 .n(16) 11317 .k(8) 11318 .qmin(128) 11319 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 11320 } 11321 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,qmax)11322 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, qmax) { 11323 TEST_REQUIRES_ARM_NEON_DOT; 11324 GemmMicrokernelTester() 11325 .mr(8) 11326 .nr(16) 11327 .kr(4) 11328 .sr(1) 11329 .m(8) 11330 .n(16) 11331 .k(8) 11332 .qmax(128) 11333 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 11334 } 11335 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,strided_cm)11336 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, strided_cm) { 11337 TEST_REQUIRES_ARM_NEON_DOT; 11338 GemmMicrokernelTester() 11339 .mr(8) 11340 .nr(16) 11341 .kr(4) 11342 .sr(1) 11343 .m(8) 11344 .n(16) 11345 .k(8) 11346 .cm_stride(19) 11347 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 11348 } 11349 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,no_a_zero_point)11350 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, no_a_zero_point) { 11351 TEST_REQUIRES_ARM_NEON_DOT; 11352 for (size_t k = 1; k <= 40; k += 9) { 11353 GemmMicrokernelTester() 11354 .mr(8) 11355 .nr(16) 11356 .kr(4) 11357 .sr(1) 11358 .m(8) 11359 .n(16) 11360 .k(k) 11361 .a_zero_point(0) 11362 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 11363 } 11364 } 11365 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,no_b_zero_point)11366 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, no_b_zero_point) { 11367 TEST_REQUIRES_ARM_NEON_DOT; 11368 for (size_t k = 1; k <= 40; k += 9) { 11369 GemmMicrokernelTester() 11370 .mr(8) 11371 .nr(16) 11372 .kr(4) 11373 .sr(1) 11374 .m(8) 11375 .n(16) 11376 .k(k) 11377 .b_zero_point(0) 11378 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 11379 } 11380 } 11381 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,no_zero_point)11382 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, no_zero_point) { 11383 TEST_REQUIRES_ARM_NEON_DOT; 11384 for (size_t k = 1; k <= 40; k += 9) { 11385 GemmMicrokernelTester() 11386 .mr(8) 11387 .nr(16) 11388 .kr(4) 11389 .sr(1) 11390 .m(8) 11391 .n(16) 11392 .k(k) 11393 .a_zero_point(0) 11394 .b_zero_point(0) 11395 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 11396 } 11397 } 11398 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64) 11399