1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 // 9 // Auto-generated file. Do not edit! 10 // Specification: test/qu8-igemm-minmax-rndnu.yaml 11 // Generator: tools/generate-gemm-test.py 12 13 14 #include <gtest/gtest.h> 15 16 #include <xnnpack/allocator.h> 17 #include <xnnpack/common.h> 18 #include <xnnpack/isa-checks.h> 19 20 #include <xnnpack/gemm.h> 21 #include <xnnpack/igemm.h> 22 #include <xnnpack/ppmm.h> 23 #include "gemm-microkernel-tester.h" 24 25 26 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8)27 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) { 28 TEST_REQUIRES_ARM_NEON; 29 GemmMicrokernelTester() 30 .mr(4) 31 .nr(8) 32 .kr(1) 33 .sr(1) 34 .m(4) 35 .n(8) 36 .k(8) 37 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 38 } 39 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,strided_cn)40 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cn) { 41 TEST_REQUIRES_ARM_NEON; 42 GemmMicrokernelTester() 43 .mr(4) 44 .nr(8) 45 .kr(1) 46 .sr(1) 47 .m(4) 48 .n(8) 49 .k(8) 50 .cn_stride(11) 51 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 52 } 53 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile)54 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) { 55 TEST_REQUIRES_ARM_NEON; 56 for (uint32_t n = 1; n <= 8; n++) { 57 for (uint32_t m = 1; m <= 4; m++) { 58 GemmMicrokernelTester() 59 .mr(4) 60 .nr(8) 61 .kr(1) 62 .sr(1) 63 .m(m) 64 .n(n) 65 .k(8) 66 .iterations(1) 67 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 68 } 69 } 70 } 71 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_m)72 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) { 73 TEST_REQUIRES_ARM_NEON; 74 for (uint32_t m = 1; m <= 4; m++) { 75 GemmMicrokernelTester() 76 .mr(4) 77 .nr(8) 78 .kr(1) 79 .sr(1) 80 .m(m) 81 .n(8) 82 .k(8) 83 .iterations(1) 84 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 85 } 86 } 87 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_n)88 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) { 89 TEST_REQUIRES_ARM_NEON; 90 for (uint32_t n = 1; n <= 8; n++) { 91 GemmMicrokernelTester() 92 .mr(4) 93 .nr(8) 94 .kr(1) 95 .sr(1) 96 .m(4) 97 .n(n) 98 .k(8) 99 .iterations(1) 100 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 101 } 102 } 103 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_lt_8)104 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) { 105 TEST_REQUIRES_ARM_NEON; 106 for (size_t k = 1; k < 8; k++) { 107 GemmMicrokernelTester() 108 .mr(4) 109 .nr(8) 110 .kr(1) 111 .sr(1) 112 .m(4) 113 .n(8) 114 .k(k) 115 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 116 } 117 } 118 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_lt_8_subtile)119 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) { 120 TEST_REQUIRES_ARM_NEON; 121 for (size_t k = 1; k < 8; k++) { 122 for (uint32_t n = 1; n <= 8; n++) { 123 for (uint32_t m = 1; m <= 4; m++) { 124 GemmMicrokernelTester() 125 .mr(4) 126 .nr(8) 127 .kr(1) 128 .sr(1) 129 .m(m) 130 .n(n) 131 .k(k) 132 .iterations(1) 133 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 134 } 135 } 136 } 137 } 138 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_gt_8)139 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) { 140 TEST_REQUIRES_ARM_NEON; 141 for (size_t k = 9; k < 16; k++) { 142 GemmMicrokernelTester() 143 .mr(4) 144 .nr(8) 145 .kr(1) 146 .sr(1) 147 .m(4) 148 .n(8) 149 .k(k) 150 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 151 } 152 } 153 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_gt_8_subtile)154 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) { 155 TEST_REQUIRES_ARM_NEON; 156 for (size_t k = 9; k < 16; k++) { 157 for (uint32_t n = 1; n <= 8; n++) { 158 for (uint32_t m = 1; m <= 4; m++) { 159 GemmMicrokernelTester() 160 .mr(4) 161 .nr(8) 162 .kr(1) 163 .sr(1) 164 .m(m) 165 .n(n) 166 .k(k) 167 .iterations(1) 168 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 169 } 170 } 171 } 172 } 173 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_div_8)174 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8) { 175 TEST_REQUIRES_ARM_NEON; 176 for (size_t k = 16; k <= 80; k += 8) { 177 GemmMicrokernelTester() 178 .mr(4) 179 .nr(8) 180 .kr(1) 181 .sr(1) 182 .m(4) 183 .n(8) 184 .k(k) 185 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 186 } 187 } 188 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_div_8_subtile)189 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) { 190 TEST_REQUIRES_ARM_NEON; 191 for (size_t k = 16; k <= 80; k += 8) { 192 for (uint32_t n = 1; n <= 8; n++) { 193 for (uint32_t m = 1; m <= 4; m++) { 194 GemmMicrokernelTester() 195 .mr(4) 196 .nr(8) 197 .kr(1) 198 .sr(1) 199 .m(m) 200 .n(n) 201 .k(k) 202 .iterations(1) 203 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 204 } 205 } 206 } 207 } 208 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8)209 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8) { 210 TEST_REQUIRES_ARM_NEON; 211 for (uint32_t n = 9; n < 16; n++) { 212 for (size_t k = 1; k <= 40; k += 9) { 213 GemmMicrokernelTester() 214 .mr(4) 215 .nr(8) 216 .kr(1) 217 .sr(1) 218 .m(4) 219 .n(n) 220 .k(k) 221 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 222 } 223 } 224 } 225 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8_strided_cn)226 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_strided_cn) { 227 TEST_REQUIRES_ARM_NEON; 228 for (uint32_t n = 9; n < 16; n++) { 229 for (size_t k = 1; k <= 40; k += 9) { 230 GemmMicrokernelTester() 231 .mr(4) 232 .nr(8) 233 .kr(1) 234 .sr(1) 235 .m(4) 236 .n(n) 237 .k(k) 238 .cn_stride(11) 239 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 240 } 241 } 242 } 243 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8_subtile)244 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_subtile) { 245 TEST_REQUIRES_ARM_NEON; 246 for (uint32_t n = 9; n < 16; n++) { 247 for (size_t k = 1; k <= 40; k += 9) { 248 for (uint32_t m = 1; m <= 4; m++) { 249 GemmMicrokernelTester() 250 .mr(4) 251 .nr(8) 252 .kr(1) 253 .sr(1) 254 .m(m) 255 .n(n) 256 .k(k) 257 .iterations(1) 258 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 259 } 260 } 261 } 262 } 263 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8)264 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8) { 265 TEST_REQUIRES_ARM_NEON; 266 for (uint32_t n = 16; n <= 24; n += 8) { 267 for (size_t k = 1; k <= 40; k += 9) { 268 GemmMicrokernelTester() 269 .mr(4) 270 .nr(8) 271 .kr(1) 272 .sr(1) 273 .m(4) 274 .n(n) 275 .k(k) 276 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 277 } 278 } 279 } 280 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8_strided_cn)281 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_strided_cn) { 282 TEST_REQUIRES_ARM_NEON; 283 for (uint32_t n = 16; n <= 24; n += 8) { 284 for (size_t k = 1; k <= 40; k += 9) { 285 GemmMicrokernelTester() 286 .mr(4) 287 .nr(8) 288 .kr(1) 289 .sr(1) 290 .m(4) 291 .n(n) 292 .k(k) 293 .cn_stride(11) 294 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 295 } 296 } 297 } 298 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8_subtile)299 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_subtile) { 300 TEST_REQUIRES_ARM_NEON; 301 for (uint32_t n = 16; n <= 24; n += 8) { 302 for (size_t k = 1; k <= 40; k += 9) { 303 for (uint32_t m = 1; m <= 4; m++) { 304 GemmMicrokernelTester() 305 .mr(4) 306 .nr(8) 307 .kr(1) 308 .sr(1) 309 .m(m) 310 .n(n) 311 .k(k) 312 .iterations(1) 313 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 314 } 315 } 316 } 317 } 318 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,small_kernel)319 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, small_kernel) { 320 TEST_REQUIRES_ARM_NEON; 321 for (size_t k = 1; k <= 40; k += 9) { 322 GemmMicrokernelTester() 323 .mr(4) 324 .nr(8) 325 .kr(1) 326 .sr(1) 327 .m(4) 328 .n(8) 329 .k(k) 330 .ks(3) 331 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 332 } 333 } 334 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,small_kernel_subtile)335 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, small_kernel_subtile) { 336 TEST_REQUIRES_ARM_NEON; 337 for (size_t k = 1; k <= 40; k += 9) { 338 for (uint32_t n = 1; n <= 8; n++) { 339 for (uint32_t m = 1; m <= 4; m++) { 340 GemmMicrokernelTester() 341 .mr(4) 342 .nr(8) 343 .kr(1) 344 .sr(1) 345 .m(m) 346 .n(n) 347 .k(k) 348 .ks(3) 349 .iterations(1) 350 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 351 } 352 } 353 } 354 } 355 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8_small_kernel)356 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_small_kernel) { 357 TEST_REQUIRES_ARM_NEON; 358 for (uint32_t n = 9; n < 16; n++) { 359 for (size_t k = 1; k <= 40; k += 9) { 360 GemmMicrokernelTester() 361 .mr(4) 362 .nr(8) 363 .kr(1) 364 .sr(1) 365 .m(4) 366 .n(n) 367 .k(k) 368 .ks(3) 369 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 370 } 371 } 372 } 373 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8_small_kernel)374 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_small_kernel) { 375 TEST_REQUIRES_ARM_NEON; 376 for (uint32_t n = 16; n <= 24; n += 8) { 377 for (size_t k = 1; k <= 40; k += 9) { 378 GemmMicrokernelTester() 379 .mr(4) 380 .nr(8) 381 .kr(1) 382 .sr(1) 383 .m(4) 384 .n(n) 385 .k(k) 386 .ks(3) 387 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 388 } 389 } 390 } 391 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,strided_cm_subtile)392 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) { 393 TEST_REQUIRES_ARM_NEON; 394 for (size_t k = 1; k <= 40; k += 9) { 395 for (uint32_t n = 1; n <= 8; n++) { 396 for (uint32_t m = 1; m <= 4; m++) { 397 GemmMicrokernelTester() 398 .mr(4) 399 .nr(8) 400 .kr(1) 401 .sr(1) 402 .m(m) 403 .n(n) 404 .k(k) 405 .cm_stride(11) 406 .iterations(1) 407 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 408 } 409 } 410 } 411 } 412 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,a_offset)413 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, a_offset) { 414 TEST_REQUIRES_ARM_NEON; 415 for (size_t k = 1; k <= 40; k += 9) { 416 GemmMicrokernelTester() 417 .mr(4) 418 .nr(8) 419 .kr(1) 420 .sr(1) 421 .m(4) 422 .n(8) 423 .k(k) 424 .ks(3) 425 .a_offset(163) 426 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 427 } 428 } 429 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,zero)430 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, zero) { 431 TEST_REQUIRES_ARM_NEON; 432 for (size_t k = 1; k <= 40; k += 9) { 433 for (uint32_t mz = 0; mz < 4; mz++) { 434 GemmMicrokernelTester() 435 .mr(4) 436 .nr(8) 437 .kr(1) 438 .sr(1) 439 .m(4) 440 .n(8) 441 .k(k) 442 .ks(3) 443 .a_offset(163) 444 .zero_index(mz) 445 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 446 } 447 } 448 } 449 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,qmin)450 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmin) { 451 TEST_REQUIRES_ARM_NEON; 452 GemmMicrokernelTester() 453 .mr(4) 454 .nr(8) 455 .kr(1) 456 .sr(1) 457 .m(4) 458 .n(8) 459 .k(8) 460 .qmin(128) 461 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 462 } 463 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,qmax)464 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmax) { 465 TEST_REQUIRES_ARM_NEON; 466 GemmMicrokernelTester() 467 .mr(4) 468 .nr(8) 469 .kr(1) 470 .sr(1) 471 .m(4) 472 .n(8) 473 .k(8) 474 .qmax(128) 475 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 476 } 477 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,strided_cm)478 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm) { 479 TEST_REQUIRES_ARM_NEON; 480 GemmMicrokernelTester() 481 .mr(4) 482 .nr(8) 483 .kr(1) 484 .sr(1) 485 .m(4) 486 .n(8) 487 .k(8) 488 .cm_stride(11) 489 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 490 } 491 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,no_a_zero_point)492 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, no_a_zero_point) { 493 TEST_REQUIRES_ARM_NEON; 494 for (size_t k = 1; k <= 40; k += 9) { 495 GemmMicrokernelTester() 496 .mr(4) 497 .nr(8) 498 .kr(1) 499 .sr(1) 500 .m(4) 501 .n(8) 502 .k(k) 503 .a_zero_point(0) 504 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 505 } 506 } 507 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,no_b_zero_point)508 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, no_b_zero_point) { 509 TEST_REQUIRES_ARM_NEON; 510 for (size_t k = 1; k <= 40; k += 9) { 511 GemmMicrokernelTester() 512 .mr(4) 513 .nr(8) 514 .kr(1) 515 .sr(1) 516 .m(4) 517 .n(8) 518 .k(k) 519 .b_zero_point(0) 520 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 521 } 522 } 523 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,no_zero_point)524 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, no_zero_point) { 525 TEST_REQUIRES_ARM_NEON; 526 for (size_t k = 1; k <= 40; k += 9) { 527 GemmMicrokernelTester() 528 .mr(4) 529 .nr(8) 530 .kr(1) 531 .sr(1) 532 .m(4) 533 .n(8) 534 .k(k) 535 .a_zero_point(0) 536 .b_zero_point(0) 537 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 538 } 539 } 540 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY 541 542 543 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_eq_8)544 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8) { 545 TEST_REQUIRES_ARM_NEON; 546 GemmMicrokernelTester() 547 .mr(4) 548 .nr(8) 549 .kr(1) 550 .sr(1) 551 .m(4) 552 .n(8) 553 .k(8) 554 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 555 } 556 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,strided_cn)557 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cn) { 558 TEST_REQUIRES_ARM_NEON; 559 GemmMicrokernelTester() 560 .mr(4) 561 .nr(8) 562 .kr(1) 563 .sr(1) 564 .m(4) 565 .n(8) 566 .k(8) 567 .cn_stride(11) 568 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 569 } 570 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_eq_8_subtile)571 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile) { 572 TEST_REQUIRES_ARM_NEON; 573 for (uint32_t n = 1; n <= 8; n++) { 574 for (uint32_t m = 1; m <= 4; m++) { 575 GemmMicrokernelTester() 576 .mr(4) 577 .nr(8) 578 .kr(1) 579 .sr(1) 580 .m(m) 581 .n(n) 582 .k(8) 583 .iterations(1) 584 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 585 } 586 } 587 } 588 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_eq_8_subtile_m)589 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) { 590 TEST_REQUIRES_ARM_NEON; 591 for (uint32_t m = 1; m <= 4; m++) { 592 GemmMicrokernelTester() 593 .mr(4) 594 .nr(8) 595 .kr(1) 596 .sr(1) 597 .m(m) 598 .n(8) 599 .k(8) 600 .iterations(1) 601 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 602 } 603 } 604 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_eq_8_subtile_n)605 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) { 606 TEST_REQUIRES_ARM_NEON; 607 for (uint32_t n = 1; n <= 8; n++) { 608 GemmMicrokernelTester() 609 .mr(4) 610 .nr(8) 611 .kr(1) 612 .sr(1) 613 .m(4) 614 .n(n) 615 .k(8) 616 .iterations(1) 617 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 618 } 619 } 620 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_lt_8)621 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8) { 622 TEST_REQUIRES_ARM_NEON; 623 for (size_t k = 1; k < 8; k++) { 624 GemmMicrokernelTester() 625 .mr(4) 626 .nr(8) 627 .kr(1) 628 .sr(1) 629 .m(4) 630 .n(8) 631 .k(k) 632 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 633 } 634 } 635 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_lt_8_subtile)636 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8_subtile) { 637 TEST_REQUIRES_ARM_NEON; 638 for (size_t k = 1; k < 8; k++) { 639 for (uint32_t n = 1; n <= 8; n++) { 640 for (uint32_t m = 1; m <= 4; m++) { 641 GemmMicrokernelTester() 642 .mr(4) 643 .nr(8) 644 .kr(1) 645 .sr(1) 646 .m(m) 647 .n(n) 648 .k(k) 649 .iterations(1) 650 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 651 } 652 } 653 } 654 } 655 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_gt_8)656 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8) { 657 TEST_REQUIRES_ARM_NEON; 658 for (size_t k = 9; k < 16; k++) { 659 GemmMicrokernelTester() 660 .mr(4) 661 .nr(8) 662 .kr(1) 663 .sr(1) 664 .m(4) 665 .n(8) 666 .k(k) 667 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 668 } 669 } 670 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_gt_8_subtile)671 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8_subtile) { 672 TEST_REQUIRES_ARM_NEON; 673 for (size_t k = 9; k < 16; k++) { 674 for (uint32_t n = 1; n <= 8; n++) { 675 for (uint32_t m = 1; m <= 4; m++) { 676 GemmMicrokernelTester() 677 .mr(4) 678 .nr(8) 679 .kr(1) 680 .sr(1) 681 .m(m) 682 .n(n) 683 .k(k) 684 .iterations(1) 685 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 686 } 687 } 688 } 689 } 690 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_div_8)691 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8) { 692 TEST_REQUIRES_ARM_NEON; 693 for (size_t k = 16; k <= 80; k += 8) { 694 GemmMicrokernelTester() 695 .mr(4) 696 .nr(8) 697 .kr(1) 698 .sr(1) 699 .m(4) 700 .n(8) 701 .k(k) 702 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 703 } 704 } 705 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_div_8_subtile)706 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8_subtile) { 707 TEST_REQUIRES_ARM_NEON; 708 for (size_t k = 16; k <= 80; k += 8) { 709 for (uint32_t n = 1; n <= 8; n++) { 710 for (uint32_t m = 1; m <= 4; m++) { 711 GemmMicrokernelTester() 712 .mr(4) 713 .nr(8) 714 .kr(1) 715 .sr(1) 716 .m(m) 717 .n(n) 718 .k(k) 719 .iterations(1) 720 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 721 } 722 } 723 } 724 } 725 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_gt_8)726 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8) { 727 TEST_REQUIRES_ARM_NEON; 728 for (uint32_t n = 9; n < 16; n++) { 729 for (size_t k = 1; k <= 40; k += 9) { 730 GemmMicrokernelTester() 731 .mr(4) 732 .nr(8) 733 .kr(1) 734 .sr(1) 735 .m(4) 736 .n(n) 737 .k(k) 738 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 739 } 740 } 741 } 742 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_gt_8_strided_cn)743 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_strided_cn) { 744 TEST_REQUIRES_ARM_NEON; 745 for (uint32_t n = 9; n < 16; n++) { 746 for (size_t k = 1; k <= 40; k += 9) { 747 GemmMicrokernelTester() 748 .mr(4) 749 .nr(8) 750 .kr(1) 751 .sr(1) 752 .m(4) 753 .n(n) 754 .k(k) 755 .cn_stride(11) 756 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 757 } 758 } 759 } 760 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_gt_8_subtile)761 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_subtile) { 762 TEST_REQUIRES_ARM_NEON; 763 for (uint32_t n = 9; n < 16; n++) { 764 for (size_t k = 1; k <= 40; k += 9) { 765 for (uint32_t m = 1; m <= 4; m++) { 766 GemmMicrokernelTester() 767 .mr(4) 768 .nr(8) 769 .kr(1) 770 .sr(1) 771 .m(m) 772 .n(n) 773 .k(k) 774 .iterations(1) 775 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 776 } 777 } 778 } 779 } 780 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_div_8)781 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8) { 782 TEST_REQUIRES_ARM_NEON; 783 for (uint32_t n = 16; n <= 24; n += 8) { 784 for (size_t k = 1; k <= 40; k += 9) { 785 GemmMicrokernelTester() 786 .mr(4) 787 .nr(8) 788 .kr(1) 789 .sr(1) 790 .m(4) 791 .n(n) 792 .k(k) 793 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 794 } 795 } 796 } 797 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_div_8_strided_cn)798 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_strided_cn) { 799 TEST_REQUIRES_ARM_NEON; 800 for (uint32_t n = 16; n <= 24; n += 8) { 801 for (size_t k = 1; k <= 40; k += 9) { 802 GemmMicrokernelTester() 803 .mr(4) 804 .nr(8) 805 .kr(1) 806 .sr(1) 807 .m(4) 808 .n(n) 809 .k(k) 810 .cn_stride(11) 811 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 812 } 813 } 814 } 815 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_div_8_subtile)816 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_subtile) { 817 TEST_REQUIRES_ARM_NEON; 818 for (uint32_t n = 16; n <= 24; n += 8) { 819 for (size_t k = 1; k <= 40; k += 9) { 820 for (uint32_t m = 1; m <= 4; m++) { 821 GemmMicrokernelTester() 822 .mr(4) 823 .nr(8) 824 .kr(1) 825 .sr(1) 826 .m(m) 827 .n(n) 828 .k(k) 829 .iterations(1) 830 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 831 } 832 } 833 } 834 } 835 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,small_kernel)836 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, small_kernel) { 837 TEST_REQUIRES_ARM_NEON; 838 for (size_t k = 1; k <= 40; k += 9) { 839 GemmMicrokernelTester() 840 .mr(4) 841 .nr(8) 842 .kr(1) 843 .sr(1) 844 .m(4) 845 .n(8) 846 .k(k) 847 .ks(3) 848 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 849 } 850 } 851 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,small_kernel_subtile)852 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, small_kernel_subtile) { 853 TEST_REQUIRES_ARM_NEON; 854 for (size_t k = 1; k <= 40; k += 9) { 855 for (uint32_t n = 1; n <= 8; n++) { 856 for (uint32_t m = 1; m <= 4; m++) { 857 GemmMicrokernelTester() 858 .mr(4) 859 .nr(8) 860 .kr(1) 861 .sr(1) 862 .m(m) 863 .n(n) 864 .k(k) 865 .ks(3) 866 .iterations(1) 867 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 868 } 869 } 870 } 871 } 872 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_gt_8_small_kernel)873 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_small_kernel) { 874 TEST_REQUIRES_ARM_NEON; 875 for (uint32_t n = 9; n < 16; n++) { 876 for (size_t k = 1; k <= 40; k += 9) { 877 GemmMicrokernelTester() 878 .mr(4) 879 .nr(8) 880 .kr(1) 881 .sr(1) 882 .m(4) 883 .n(n) 884 .k(k) 885 .ks(3) 886 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 887 } 888 } 889 } 890 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_div_8_small_kernel)891 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_small_kernel) { 892 TEST_REQUIRES_ARM_NEON; 893 for (uint32_t n = 16; n <= 24; n += 8) { 894 for (size_t k = 1; k <= 40; k += 9) { 895 GemmMicrokernelTester() 896 .mr(4) 897 .nr(8) 898 .kr(1) 899 .sr(1) 900 .m(4) 901 .n(n) 902 .k(k) 903 .ks(3) 904 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 905 } 906 } 907 } 908 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,strided_cm_subtile)909 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cm_subtile) { 910 TEST_REQUIRES_ARM_NEON; 911 for (size_t k = 1; k <= 40; k += 9) { 912 for (uint32_t n = 1; n <= 8; n++) { 913 for (uint32_t m = 1; m <= 4; m++) { 914 GemmMicrokernelTester() 915 .mr(4) 916 .nr(8) 917 .kr(1) 918 .sr(1) 919 .m(m) 920 .n(n) 921 .k(k) 922 .cm_stride(11) 923 .iterations(1) 924 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 925 } 926 } 927 } 928 } 929 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,a_offset)930 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, a_offset) { 931 TEST_REQUIRES_ARM_NEON; 932 for (size_t k = 1; k <= 40; k += 9) { 933 GemmMicrokernelTester() 934 .mr(4) 935 .nr(8) 936 .kr(1) 937 .sr(1) 938 .m(4) 939 .n(8) 940 .k(k) 941 .ks(3) 942 .a_offset(163) 943 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 944 } 945 } 946 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,zero)947 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, zero) { 948 TEST_REQUIRES_ARM_NEON; 949 for (size_t k = 1; k <= 40; k += 9) { 950 for (uint32_t mz = 0; mz < 4; mz++) { 951 GemmMicrokernelTester() 952 .mr(4) 953 .nr(8) 954 .kr(1) 955 .sr(1) 956 .m(4) 957 .n(8) 958 .k(k) 959 .ks(3) 960 .a_offset(163) 961 .zero_index(mz) 962 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 963 } 964 } 965 } 966 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,qmin)967 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, qmin) { 968 TEST_REQUIRES_ARM_NEON; 969 GemmMicrokernelTester() 970 .mr(4) 971 .nr(8) 972 .kr(1) 973 .sr(1) 974 .m(4) 975 .n(8) 976 .k(8) 977 .qmin(128) 978 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 979 } 980 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,qmax)981 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, qmax) { 982 TEST_REQUIRES_ARM_NEON; 983 GemmMicrokernelTester() 984 .mr(4) 985 .nr(8) 986 .kr(1) 987 .sr(1) 988 .m(4) 989 .n(8) 990 .k(8) 991 .qmax(128) 992 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 993 } 994 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,strided_cm)995 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cm) { 996 TEST_REQUIRES_ARM_NEON; 997 GemmMicrokernelTester() 998 .mr(4) 999 .nr(8) 1000 .kr(1) 1001 .sr(1) 1002 .m(4) 1003 .n(8) 1004 .k(8) 1005 .cm_stride(11) 1006 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1007 } 1008 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,no_a_zero_point)1009 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, no_a_zero_point) { 1010 TEST_REQUIRES_ARM_NEON; 1011 for (size_t k = 1; k <= 40; k += 9) { 1012 GemmMicrokernelTester() 1013 .mr(4) 1014 .nr(8) 1015 .kr(1) 1016 .sr(1) 1017 .m(4) 1018 .n(8) 1019 .k(k) 1020 .a_zero_point(0) 1021 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1022 } 1023 } 1024 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,no_b_zero_point)1025 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, no_b_zero_point) { 1026 TEST_REQUIRES_ARM_NEON; 1027 for (size_t k = 1; k <= 40; k += 9) { 1028 GemmMicrokernelTester() 1029 .mr(4) 1030 .nr(8) 1031 .kr(1) 1032 .sr(1) 1033 .m(4) 1034 .n(8) 1035 .k(k) 1036 .b_zero_point(0) 1037 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1038 } 1039 } 1040 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,no_zero_point)1041 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, no_zero_point) { 1042 TEST_REQUIRES_ARM_NEON; 1043 for (size_t k = 1; k <= 40; k += 9) { 1044 GemmMicrokernelTester() 1045 .mr(4) 1046 .nr(8) 1047 .kr(1) 1048 .sr(1) 1049 .m(4) 1050 .n(8) 1051 .k(k) 1052 .a_zero_point(0) 1053 .b_zero_point(0) 1054 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1055 } 1056 } 1057 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY 1058 1059 1060 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,k_eq_8)1061 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_eq_8) { 1062 TEST_REQUIRES_ARM_NEON; 1063 GemmMicrokernelTester() 1064 .mr(2) 1065 .nr(8) 1066 .kr(1) 1067 .sr(1) 1068 .m(2) 1069 .n(8) 1070 .k(8) 1071 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1072 } 1073 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,strided_cn)1074 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, strided_cn) { 1075 TEST_REQUIRES_ARM_NEON; 1076 GemmMicrokernelTester() 1077 .mr(2) 1078 .nr(8) 1079 .kr(1) 1080 .sr(1) 1081 .m(2) 1082 .n(8) 1083 .k(8) 1084 .cn_stride(11) 1085 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1086 } 1087 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,k_eq_8_subtile)1088 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_eq_8_subtile) { 1089 TEST_REQUIRES_ARM_NEON; 1090 for (uint32_t n = 1; n <= 8; n++) { 1091 for (uint32_t m = 1; m <= 2; m++) { 1092 GemmMicrokernelTester() 1093 .mr(2) 1094 .nr(8) 1095 .kr(1) 1096 .sr(1) 1097 .m(m) 1098 .n(n) 1099 .k(8) 1100 .iterations(1) 1101 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1102 } 1103 } 1104 } 1105 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,k_eq_8_subtile_m)1106 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_eq_8_subtile_m) { 1107 TEST_REQUIRES_ARM_NEON; 1108 for (uint32_t m = 1; m <= 2; m++) { 1109 GemmMicrokernelTester() 1110 .mr(2) 1111 .nr(8) 1112 .kr(1) 1113 .sr(1) 1114 .m(m) 1115 .n(8) 1116 .k(8) 1117 .iterations(1) 1118 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1119 } 1120 } 1121 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,k_eq_8_subtile_n)1122 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_eq_8_subtile_n) { 1123 TEST_REQUIRES_ARM_NEON; 1124 for (uint32_t n = 1; n <= 8; n++) { 1125 GemmMicrokernelTester() 1126 .mr(2) 1127 .nr(8) 1128 .kr(1) 1129 .sr(1) 1130 .m(2) 1131 .n(n) 1132 .k(8) 1133 .iterations(1) 1134 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1135 } 1136 } 1137 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,k_lt_8)1138 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_lt_8) { 1139 TEST_REQUIRES_ARM_NEON; 1140 for (size_t k = 1; k < 8; k++) { 1141 GemmMicrokernelTester() 1142 .mr(2) 1143 .nr(8) 1144 .kr(1) 1145 .sr(1) 1146 .m(2) 1147 .n(8) 1148 .k(k) 1149 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1150 } 1151 } 1152 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,k_lt_8_subtile)1153 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_lt_8_subtile) { 1154 TEST_REQUIRES_ARM_NEON; 1155 for (size_t k = 1; k < 8; k++) { 1156 for (uint32_t n = 1; n <= 8; n++) { 1157 for (uint32_t m = 1; m <= 2; m++) { 1158 GemmMicrokernelTester() 1159 .mr(2) 1160 .nr(8) 1161 .kr(1) 1162 .sr(1) 1163 .m(m) 1164 .n(n) 1165 .k(k) 1166 .iterations(1) 1167 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1168 } 1169 } 1170 } 1171 } 1172 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,k_gt_8)1173 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_gt_8) { 1174 TEST_REQUIRES_ARM_NEON; 1175 for (size_t k = 9; k < 16; k++) { 1176 GemmMicrokernelTester() 1177 .mr(2) 1178 .nr(8) 1179 .kr(1) 1180 .sr(1) 1181 .m(2) 1182 .n(8) 1183 .k(k) 1184 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1185 } 1186 } 1187 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,k_gt_8_subtile)1188 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_gt_8_subtile) { 1189 TEST_REQUIRES_ARM_NEON; 1190 for (size_t k = 9; k < 16; k++) { 1191 for (uint32_t n = 1; n <= 8; n++) { 1192 for (uint32_t m = 1; m <= 2; m++) { 1193 GemmMicrokernelTester() 1194 .mr(2) 1195 .nr(8) 1196 .kr(1) 1197 .sr(1) 1198 .m(m) 1199 .n(n) 1200 .k(k) 1201 .iterations(1) 1202 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1203 } 1204 } 1205 } 1206 } 1207 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,k_div_8)1208 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_div_8) { 1209 TEST_REQUIRES_ARM_NEON; 1210 for (size_t k = 16; k <= 80; k += 8) { 1211 GemmMicrokernelTester() 1212 .mr(2) 1213 .nr(8) 1214 .kr(1) 1215 .sr(1) 1216 .m(2) 1217 .n(8) 1218 .k(k) 1219 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1220 } 1221 } 1222 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,k_div_8_subtile)1223 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_div_8_subtile) { 1224 TEST_REQUIRES_ARM_NEON; 1225 for (size_t k = 16; k <= 80; k += 8) { 1226 for (uint32_t n = 1; n <= 8; n++) { 1227 for (uint32_t m = 1; m <= 2; m++) { 1228 GemmMicrokernelTester() 1229 .mr(2) 1230 .nr(8) 1231 .kr(1) 1232 .sr(1) 1233 .m(m) 1234 .n(n) 1235 .k(k) 1236 .iterations(1) 1237 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1238 } 1239 } 1240 } 1241 } 1242 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,n_gt_8)1243 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, n_gt_8) { 1244 TEST_REQUIRES_ARM_NEON; 1245 for (uint32_t n = 9; n < 16; n++) { 1246 for (size_t k = 1; k <= 40; k += 9) { 1247 GemmMicrokernelTester() 1248 .mr(2) 1249 .nr(8) 1250 .kr(1) 1251 .sr(1) 1252 .m(2) 1253 .n(n) 1254 .k(k) 1255 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1256 } 1257 } 1258 } 1259 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,n_gt_8_strided_cn)1260 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, n_gt_8_strided_cn) { 1261 TEST_REQUIRES_ARM_NEON; 1262 for (uint32_t n = 9; n < 16; n++) { 1263 for (size_t k = 1; k <= 40; k += 9) { 1264 GemmMicrokernelTester() 1265 .mr(2) 1266 .nr(8) 1267 .kr(1) 1268 .sr(1) 1269 .m(2) 1270 .n(n) 1271 .k(k) 1272 .cn_stride(11) 1273 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1274 } 1275 } 1276 } 1277 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,n_gt_8_subtile)1278 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, n_gt_8_subtile) { 1279 TEST_REQUIRES_ARM_NEON; 1280 for (uint32_t n = 9; n < 16; n++) { 1281 for (size_t k = 1; k <= 40; k += 9) { 1282 for (uint32_t m = 1; m <= 2; m++) { 1283 GemmMicrokernelTester() 1284 .mr(2) 1285 .nr(8) 1286 .kr(1) 1287 .sr(1) 1288 .m(m) 1289 .n(n) 1290 .k(k) 1291 .iterations(1) 1292 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1293 } 1294 } 1295 } 1296 } 1297 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,n_div_8)1298 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, n_div_8) { 1299 TEST_REQUIRES_ARM_NEON; 1300 for (uint32_t n = 16; n <= 24; n += 8) { 1301 for (size_t k = 1; k <= 40; k += 9) { 1302 GemmMicrokernelTester() 1303 .mr(2) 1304 .nr(8) 1305 .kr(1) 1306 .sr(1) 1307 .m(2) 1308 .n(n) 1309 .k(k) 1310 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1311 } 1312 } 1313 } 1314 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,n_div_8_strided_cn)1315 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, n_div_8_strided_cn) { 1316 TEST_REQUIRES_ARM_NEON; 1317 for (uint32_t n = 16; n <= 24; n += 8) { 1318 for (size_t k = 1; k <= 40; k += 9) { 1319 GemmMicrokernelTester() 1320 .mr(2) 1321 .nr(8) 1322 .kr(1) 1323 .sr(1) 1324 .m(2) 1325 .n(n) 1326 .k(k) 1327 .cn_stride(11) 1328 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1329 } 1330 } 1331 } 1332 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,n_div_8_subtile)1333 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, n_div_8_subtile) { 1334 TEST_REQUIRES_ARM_NEON; 1335 for (uint32_t n = 16; n <= 24; n += 8) { 1336 for (size_t k = 1; k <= 40; k += 9) { 1337 for (uint32_t m = 1; m <= 2; m++) { 1338 GemmMicrokernelTester() 1339 .mr(2) 1340 .nr(8) 1341 .kr(1) 1342 .sr(1) 1343 .m(m) 1344 .n(n) 1345 .k(k) 1346 .iterations(1) 1347 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1348 } 1349 } 1350 } 1351 } 1352 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,small_kernel)1353 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, small_kernel) { 1354 TEST_REQUIRES_ARM_NEON; 1355 for (size_t k = 1; k <= 40; k += 9) { 1356 GemmMicrokernelTester() 1357 .mr(2) 1358 .nr(8) 1359 .kr(1) 1360 .sr(1) 1361 .m(2) 1362 .n(8) 1363 .k(k) 1364 .ks(3) 1365 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1366 } 1367 } 1368 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,small_kernel_subtile)1369 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, small_kernel_subtile) { 1370 TEST_REQUIRES_ARM_NEON; 1371 for (size_t k = 1; k <= 40; k += 9) { 1372 for (uint32_t n = 1; n <= 8; n++) { 1373 for (uint32_t m = 1; m <= 2; m++) { 1374 GemmMicrokernelTester() 1375 .mr(2) 1376 .nr(8) 1377 .kr(1) 1378 .sr(1) 1379 .m(m) 1380 .n(n) 1381 .k(k) 1382 .ks(3) 1383 .iterations(1) 1384 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1385 } 1386 } 1387 } 1388 } 1389 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,n_gt_8_small_kernel)1390 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, n_gt_8_small_kernel) { 1391 TEST_REQUIRES_ARM_NEON; 1392 for (uint32_t n = 9; n < 16; n++) { 1393 for (size_t k = 1; k <= 40; k += 9) { 1394 GemmMicrokernelTester() 1395 .mr(2) 1396 .nr(8) 1397 .kr(1) 1398 .sr(1) 1399 .m(2) 1400 .n(n) 1401 .k(k) 1402 .ks(3) 1403 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1404 } 1405 } 1406 } 1407 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,n_div_8_small_kernel)1408 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, n_div_8_small_kernel) { 1409 TEST_REQUIRES_ARM_NEON; 1410 for (uint32_t n = 16; n <= 24; n += 8) { 1411 for (size_t k = 1; k <= 40; k += 9) { 1412 GemmMicrokernelTester() 1413 .mr(2) 1414 .nr(8) 1415 .kr(1) 1416 .sr(1) 1417 .m(2) 1418 .n(n) 1419 .k(k) 1420 .ks(3) 1421 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1422 } 1423 } 1424 } 1425 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,strided_cm_subtile)1426 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, strided_cm_subtile) { 1427 TEST_REQUIRES_ARM_NEON; 1428 for (size_t k = 1; k <= 40; k += 9) { 1429 for (uint32_t n = 1; n <= 8; n++) { 1430 for (uint32_t m = 1; m <= 2; m++) { 1431 GemmMicrokernelTester() 1432 .mr(2) 1433 .nr(8) 1434 .kr(1) 1435 .sr(1) 1436 .m(m) 1437 .n(n) 1438 .k(k) 1439 .cm_stride(11) 1440 .iterations(1) 1441 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1442 } 1443 } 1444 } 1445 } 1446 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,a_offset)1447 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, a_offset) { 1448 TEST_REQUIRES_ARM_NEON; 1449 for (size_t k = 1; k <= 40; k += 9) { 1450 GemmMicrokernelTester() 1451 .mr(2) 1452 .nr(8) 1453 .kr(1) 1454 .sr(1) 1455 .m(2) 1456 .n(8) 1457 .k(k) 1458 .ks(3) 1459 .a_offset(83) 1460 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1461 } 1462 } 1463 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,zero)1464 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, zero) { 1465 TEST_REQUIRES_ARM_NEON; 1466 for (size_t k = 1; k <= 40; k += 9) { 1467 for (uint32_t mz = 0; mz < 2; mz++) { 1468 GemmMicrokernelTester() 1469 .mr(2) 1470 .nr(8) 1471 .kr(1) 1472 .sr(1) 1473 .m(2) 1474 .n(8) 1475 .k(k) 1476 .ks(3) 1477 .a_offset(83) 1478 .zero_index(mz) 1479 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1480 } 1481 } 1482 } 1483 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,qmin)1484 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, qmin) { 1485 TEST_REQUIRES_ARM_NEON; 1486 GemmMicrokernelTester() 1487 .mr(2) 1488 .nr(8) 1489 .kr(1) 1490 .sr(1) 1491 .m(2) 1492 .n(8) 1493 .k(8) 1494 .qmin(128) 1495 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1496 } 1497 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,qmax)1498 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, qmax) { 1499 TEST_REQUIRES_ARM_NEON; 1500 GemmMicrokernelTester() 1501 .mr(2) 1502 .nr(8) 1503 .kr(1) 1504 .sr(1) 1505 .m(2) 1506 .n(8) 1507 .k(8) 1508 .qmax(128) 1509 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1510 } 1511 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,strided_cm)1512 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, strided_cm) { 1513 TEST_REQUIRES_ARM_NEON; 1514 GemmMicrokernelTester() 1515 .mr(2) 1516 .nr(8) 1517 .kr(1) 1518 .sr(1) 1519 .m(2) 1520 .n(8) 1521 .k(8) 1522 .cm_stride(11) 1523 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1524 } 1525 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,no_a_zero_point)1526 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, no_a_zero_point) { 1527 TEST_REQUIRES_ARM_NEON; 1528 for (size_t k = 1; k <= 40; k += 9) { 1529 GemmMicrokernelTester() 1530 .mr(2) 1531 .nr(8) 1532 .kr(1) 1533 .sr(1) 1534 .m(2) 1535 .n(8) 1536 .k(k) 1537 .a_zero_point(0) 1538 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1539 } 1540 } 1541 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,no_b_zero_point)1542 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, no_b_zero_point) { 1543 TEST_REQUIRES_ARM_NEON; 1544 for (size_t k = 1; k <= 40; k += 9) { 1545 GemmMicrokernelTester() 1546 .mr(2) 1547 .nr(8) 1548 .kr(1) 1549 .sr(1) 1550 .m(2) 1551 .n(8) 1552 .k(k) 1553 .b_zero_point(0) 1554 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1555 } 1556 } 1557 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE,no_zero_point)1558 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, no_zero_point) { 1559 TEST_REQUIRES_ARM_NEON; 1560 for (size_t k = 1; k <= 40; k += 9) { 1561 GemmMicrokernelTester() 1562 .mr(2) 1563 .nr(8) 1564 .kr(1) 1565 .sr(1) 1566 .m(2) 1567 .n(8) 1568 .k(k) 1569 .a_zero_point(0) 1570 .b_zero_point(0) 1571 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1572 } 1573 } 1574 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 1575 1576 1577 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,k_eq_8)1578 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_eq_8) { 1579 TEST_REQUIRES_ARM_NEON; 1580 GemmMicrokernelTester() 1581 .mr(3) 1582 .nr(8) 1583 .kr(1) 1584 .sr(1) 1585 .m(3) 1586 .n(8) 1587 .k(8) 1588 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1589 } 1590 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,strided_cn)1591 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, strided_cn) { 1592 TEST_REQUIRES_ARM_NEON; 1593 GemmMicrokernelTester() 1594 .mr(3) 1595 .nr(8) 1596 .kr(1) 1597 .sr(1) 1598 .m(3) 1599 .n(8) 1600 .k(8) 1601 .cn_stride(11) 1602 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1603 } 1604 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,k_eq_8_subtile)1605 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_eq_8_subtile) { 1606 TEST_REQUIRES_ARM_NEON; 1607 for (uint32_t n = 1; n <= 8; n++) { 1608 for (uint32_t m = 1; m <= 3; m++) { 1609 GemmMicrokernelTester() 1610 .mr(3) 1611 .nr(8) 1612 .kr(1) 1613 .sr(1) 1614 .m(m) 1615 .n(n) 1616 .k(8) 1617 .iterations(1) 1618 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1619 } 1620 } 1621 } 1622 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,k_eq_8_subtile_m)1623 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_eq_8_subtile_m) { 1624 TEST_REQUIRES_ARM_NEON; 1625 for (uint32_t m = 1; m <= 3; m++) { 1626 GemmMicrokernelTester() 1627 .mr(3) 1628 .nr(8) 1629 .kr(1) 1630 .sr(1) 1631 .m(m) 1632 .n(8) 1633 .k(8) 1634 .iterations(1) 1635 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1636 } 1637 } 1638 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,k_eq_8_subtile_n)1639 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_eq_8_subtile_n) { 1640 TEST_REQUIRES_ARM_NEON; 1641 for (uint32_t n = 1; n <= 8; n++) { 1642 GemmMicrokernelTester() 1643 .mr(3) 1644 .nr(8) 1645 .kr(1) 1646 .sr(1) 1647 .m(3) 1648 .n(n) 1649 .k(8) 1650 .iterations(1) 1651 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1652 } 1653 } 1654 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,k_lt_8)1655 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_lt_8) { 1656 TEST_REQUIRES_ARM_NEON; 1657 for (size_t k = 1; k < 8; k++) { 1658 GemmMicrokernelTester() 1659 .mr(3) 1660 .nr(8) 1661 .kr(1) 1662 .sr(1) 1663 .m(3) 1664 .n(8) 1665 .k(k) 1666 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1667 } 1668 } 1669 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,k_lt_8_subtile)1670 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_lt_8_subtile) { 1671 TEST_REQUIRES_ARM_NEON; 1672 for (size_t k = 1; k < 8; k++) { 1673 for (uint32_t n = 1; n <= 8; n++) { 1674 for (uint32_t m = 1; m <= 3; m++) { 1675 GemmMicrokernelTester() 1676 .mr(3) 1677 .nr(8) 1678 .kr(1) 1679 .sr(1) 1680 .m(m) 1681 .n(n) 1682 .k(k) 1683 .iterations(1) 1684 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1685 } 1686 } 1687 } 1688 } 1689 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,k_gt_8)1690 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_gt_8) { 1691 TEST_REQUIRES_ARM_NEON; 1692 for (size_t k = 9; k < 16; k++) { 1693 GemmMicrokernelTester() 1694 .mr(3) 1695 .nr(8) 1696 .kr(1) 1697 .sr(1) 1698 .m(3) 1699 .n(8) 1700 .k(k) 1701 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1702 } 1703 } 1704 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,k_gt_8_subtile)1705 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_gt_8_subtile) { 1706 TEST_REQUIRES_ARM_NEON; 1707 for (size_t k = 9; k < 16; k++) { 1708 for (uint32_t n = 1; n <= 8; n++) { 1709 for (uint32_t m = 1; m <= 3; m++) { 1710 GemmMicrokernelTester() 1711 .mr(3) 1712 .nr(8) 1713 .kr(1) 1714 .sr(1) 1715 .m(m) 1716 .n(n) 1717 .k(k) 1718 .iterations(1) 1719 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1720 } 1721 } 1722 } 1723 } 1724 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,k_div_8)1725 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_div_8) { 1726 TEST_REQUIRES_ARM_NEON; 1727 for (size_t k = 16; k <= 80; k += 8) { 1728 GemmMicrokernelTester() 1729 .mr(3) 1730 .nr(8) 1731 .kr(1) 1732 .sr(1) 1733 .m(3) 1734 .n(8) 1735 .k(k) 1736 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1737 } 1738 } 1739 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,k_div_8_subtile)1740 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_div_8_subtile) { 1741 TEST_REQUIRES_ARM_NEON; 1742 for (size_t k = 16; k <= 80; k += 8) { 1743 for (uint32_t n = 1; n <= 8; n++) { 1744 for (uint32_t m = 1; m <= 3; m++) { 1745 GemmMicrokernelTester() 1746 .mr(3) 1747 .nr(8) 1748 .kr(1) 1749 .sr(1) 1750 .m(m) 1751 .n(n) 1752 .k(k) 1753 .iterations(1) 1754 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1755 } 1756 } 1757 } 1758 } 1759 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,n_gt_8)1760 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, n_gt_8) { 1761 TEST_REQUIRES_ARM_NEON; 1762 for (uint32_t n = 9; n < 16; n++) { 1763 for (size_t k = 1; k <= 40; k += 9) { 1764 GemmMicrokernelTester() 1765 .mr(3) 1766 .nr(8) 1767 .kr(1) 1768 .sr(1) 1769 .m(3) 1770 .n(n) 1771 .k(k) 1772 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1773 } 1774 } 1775 } 1776 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,n_gt_8_strided_cn)1777 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, n_gt_8_strided_cn) { 1778 TEST_REQUIRES_ARM_NEON; 1779 for (uint32_t n = 9; n < 16; n++) { 1780 for (size_t k = 1; k <= 40; k += 9) { 1781 GemmMicrokernelTester() 1782 .mr(3) 1783 .nr(8) 1784 .kr(1) 1785 .sr(1) 1786 .m(3) 1787 .n(n) 1788 .k(k) 1789 .cn_stride(11) 1790 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1791 } 1792 } 1793 } 1794 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,n_gt_8_subtile)1795 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, n_gt_8_subtile) { 1796 TEST_REQUIRES_ARM_NEON; 1797 for (uint32_t n = 9; n < 16; n++) { 1798 for (size_t k = 1; k <= 40; k += 9) { 1799 for (uint32_t m = 1; m <= 3; m++) { 1800 GemmMicrokernelTester() 1801 .mr(3) 1802 .nr(8) 1803 .kr(1) 1804 .sr(1) 1805 .m(m) 1806 .n(n) 1807 .k(k) 1808 .iterations(1) 1809 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1810 } 1811 } 1812 } 1813 } 1814 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,n_div_8)1815 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, n_div_8) { 1816 TEST_REQUIRES_ARM_NEON; 1817 for (uint32_t n = 16; n <= 24; n += 8) { 1818 for (size_t k = 1; k <= 40; k += 9) { 1819 GemmMicrokernelTester() 1820 .mr(3) 1821 .nr(8) 1822 .kr(1) 1823 .sr(1) 1824 .m(3) 1825 .n(n) 1826 .k(k) 1827 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1828 } 1829 } 1830 } 1831 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,n_div_8_strided_cn)1832 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, n_div_8_strided_cn) { 1833 TEST_REQUIRES_ARM_NEON; 1834 for (uint32_t n = 16; n <= 24; n += 8) { 1835 for (size_t k = 1; k <= 40; k += 9) { 1836 GemmMicrokernelTester() 1837 .mr(3) 1838 .nr(8) 1839 .kr(1) 1840 .sr(1) 1841 .m(3) 1842 .n(n) 1843 .k(k) 1844 .cn_stride(11) 1845 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1846 } 1847 } 1848 } 1849 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,n_div_8_subtile)1850 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, n_div_8_subtile) { 1851 TEST_REQUIRES_ARM_NEON; 1852 for (uint32_t n = 16; n <= 24; n += 8) { 1853 for (size_t k = 1; k <= 40; k += 9) { 1854 for (uint32_t m = 1; m <= 3; m++) { 1855 GemmMicrokernelTester() 1856 .mr(3) 1857 .nr(8) 1858 .kr(1) 1859 .sr(1) 1860 .m(m) 1861 .n(n) 1862 .k(k) 1863 .iterations(1) 1864 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1865 } 1866 } 1867 } 1868 } 1869 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,small_kernel)1870 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, small_kernel) { 1871 TEST_REQUIRES_ARM_NEON; 1872 for (size_t k = 1; k <= 40; k += 9) { 1873 GemmMicrokernelTester() 1874 .mr(3) 1875 .nr(8) 1876 .kr(1) 1877 .sr(1) 1878 .m(3) 1879 .n(8) 1880 .k(k) 1881 .ks(3) 1882 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1883 } 1884 } 1885 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,small_kernel_subtile)1886 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, small_kernel_subtile) { 1887 TEST_REQUIRES_ARM_NEON; 1888 for (size_t k = 1; k <= 40; k += 9) { 1889 for (uint32_t n = 1; n <= 8; n++) { 1890 for (uint32_t m = 1; m <= 3; m++) { 1891 GemmMicrokernelTester() 1892 .mr(3) 1893 .nr(8) 1894 .kr(1) 1895 .sr(1) 1896 .m(m) 1897 .n(n) 1898 .k(k) 1899 .ks(3) 1900 .iterations(1) 1901 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1902 } 1903 } 1904 } 1905 } 1906 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,n_gt_8_small_kernel)1907 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, n_gt_8_small_kernel) { 1908 TEST_REQUIRES_ARM_NEON; 1909 for (uint32_t n = 9; n < 16; n++) { 1910 for (size_t k = 1; k <= 40; k += 9) { 1911 GemmMicrokernelTester() 1912 .mr(3) 1913 .nr(8) 1914 .kr(1) 1915 .sr(1) 1916 .m(3) 1917 .n(n) 1918 .k(k) 1919 .ks(3) 1920 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1921 } 1922 } 1923 } 1924 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,n_div_8_small_kernel)1925 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, n_div_8_small_kernel) { 1926 TEST_REQUIRES_ARM_NEON; 1927 for (uint32_t n = 16; n <= 24; n += 8) { 1928 for (size_t k = 1; k <= 40; k += 9) { 1929 GemmMicrokernelTester() 1930 .mr(3) 1931 .nr(8) 1932 .kr(1) 1933 .sr(1) 1934 .m(3) 1935 .n(n) 1936 .k(k) 1937 .ks(3) 1938 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1939 } 1940 } 1941 } 1942 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,strided_cm_subtile)1943 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, strided_cm_subtile) { 1944 TEST_REQUIRES_ARM_NEON; 1945 for (size_t k = 1; k <= 40; k += 9) { 1946 for (uint32_t n = 1; n <= 8; n++) { 1947 for (uint32_t m = 1; m <= 3; m++) { 1948 GemmMicrokernelTester() 1949 .mr(3) 1950 .nr(8) 1951 .kr(1) 1952 .sr(1) 1953 .m(m) 1954 .n(n) 1955 .k(k) 1956 .cm_stride(11) 1957 .iterations(1) 1958 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1959 } 1960 } 1961 } 1962 } 1963 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,a_offset)1964 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, a_offset) { 1965 TEST_REQUIRES_ARM_NEON; 1966 for (size_t k = 1; k <= 40; k += 9) { 1967 GemmMicrokernelTester() 1968 .mr(3) 1969 .nr(8) 1970 .kr(1) 1971 .sr(1) 1972 .m(3) 1973 .n(8) 1974 .k(k) 1975 .ks(3) 1976 .a_offset(127) 1977 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1978 } 1979 } 1980 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,zero)1981 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, zero) { 1982 TEST_REQUIRES_ARM_NEON; 1983 for (size_t k = 1; k <= 40; k += 9) { 1984 for (uint32_t mz = 0; mz < 3; mz++) { 1985 GemmMicrokernelTester() 1986 .mr(3) 1987 .nr(8) 1988 .kr(1) 1989 .sr(1) 1990 .m(3) 1991 .n(8) 1992 .k(k) 1993 .ks(3) 1994 .a_offset(127) 1995 .zero_index(mz) 1996 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1997 } 1998 } 1999 } 2000 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,qmin)2001 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, qmin) { 2002 TEST_REQUIRES_ARM_NEON; 2003 GemmMicrokernelTester() 2004 .mr(3) 2005 .nr(8) 2006 .kr(1) 2007 .sr(1) 2008 .m(3) 2009 .n(8) 2010 .k(8) 2011 .qmin(128) 2012 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2013 } 2014 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,qmax)2015 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, qmax) { 2016 TEST_REQUIRES_ARM_NEON; 2017 GemmMicrokernelTester() 2018 .mr(3) 2019 .nr(8) 2020 .kr(1) 2021 .sr(1) 2022 .m(3) 2023 .n(8) 2024 .k(8) 2025 .qmax(128) 2026 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2027 } 2028 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,strided_cm)2029 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, strided_cm) { 2030 TEST_REQUIRES_ARM_NEON; 2031 GemmMicrokernelTester() 2032 .mr(3) 2033 .nr(8) 2034 .kr(1) 2035 .sr(1) 2036 .m(3) 2037 .n(8) 2038 .k(8) 2039 .cm_stride(11) 2040 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2041 } 2042 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,no_a_zero_point)2043 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, no_a_zero_point) { 2044 TEST_REQUIRES_ARM_NEON; 2045 for (size_t k = 1; k <= 40; k += 9) { 2046 GemmMicrokernelTester() 2047 .mr(3) 2048 .nr(8) 2049 .kr(1) 2050 .sr(1) 2051 .m(3) 2052 .n(8) 2053 .k(k) 2054 .a_zero_point(0) 2055 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2056 } 2057 } 2058 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,no_b_zero_point)2059 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, no_b_zero_point) { 2060 TEST_REQUIRES_ARM_NEON; 2061 for (size_t k = 1; k <= 40; k += 9) { 2062 GemmMicrokernelTester() 2063 .mr(3) 2064 .nr(8) 2065 .kr(1) 2066 .sr(1) 2067 .m(3) 2068 .n(8) 2069 .k(k) 2070 .b_zero_point(0) 2071 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2072 } 2073 } 2074 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE,no_zero_point)2075 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, no_zero_point) { 2076 TEST_REQUIRES_ARM_NEON; 2077 for (size_t k = 1; k <= 40; k += 9) { 2078 GemmMicrokernelTester() 2079 .mr(3) 2080 .nr(8) 2081 .kr(1) 2082 .sr(1) 2083 .m(3) 2084 .n(8) 2085 .k(k) 2086 .a_zero_point(0) 2087 .b_zero_point(0) 2088 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2089 } 2090 } 2091 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 2092 2093 2094 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_eq_8)2095 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_eq_8) { 2096 TEST_REQUIRES_ARM_NEON; 2097 GemmMicrokernelTester() 2098 .mr(6) 2099 .nr(8) 2100 .kr(1) 2101 .sr(1) 2102 .m(6) 2103 .n(8) 2104 .k(8) 2105 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2106 } 2107 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,strided_cn)2108 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, strided_cn) { 2109 TEST_REQUIRES_ARM_NEON; 2110 GemmMicrokernelTester() 2111 .mr(6) 2112 .nr(8) 2113 .kr(1) 2114 .sr(1) 2115 .m(6) 2116 .n(8) 2117 .k(8) 2118 .cn_stride(11) 2119 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2120 } 2121 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_eq_8_subtile)2122 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_eq_8_subtile) { 2123 TEST_REQUIRES_ARM_NEON; 2124 for (uint32_t n = 1; n <= 8; n++) { 2125 for (uint32_t m = 1; m <= 6; m++) { 2126 GemmMicrokernelTester() 2127 .mr(6) 2128 .nr(8) 2129 .kr(1) 2130 .sr(1) 2131 .m(m) 2132 .n(n) 2133 .k(8) 2134 .iterations(1) 2135 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2136 } 2137 } 2138 } 2139 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_eq_8_subtile_m)2140 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_eq_8_subtile_m) { 2141 TEST_REQUIRES_ARM_NEON; 2142 for (uint32_t m = 1; m <= 6; m++) { 2143 GemmMicrokernelTester() 2144 .mr(6) 2145 .nr(8) 2146 .kr(1) 2147 .sr(1) 2148 .m(m) 2149 .n(8) 2150 .k(8) 2151 .iterations(1) 2152 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2153 } 2154 } 2155 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_eq_8_subtile_n)2156 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_eq_8_subtile_n) { 2157 TEST_REQUIRES_ARM_NEON; 2158 for (uint32_t n = 1; n <= 8; n++) { 2159 GemmMicrokernelTester() 2160 .mr(6) 2161 .nr(8) 2162 .kr(1) 2163 .sr(1) 2164 .m(6) 2165 .n(n) 2166 .k(8) 2167 .iterations(1) 2168 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2169 } 2170 } 2171 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_lt_8)2172 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_lt_8) { 2173 TEST_REQUIRES_ARM_NEON; 2174 for (size_t k = 1; k < 8; k++) { 2175 GemmMicrokernelTester() 2176 .mr(6) 2177 .nr(8) 2178 .kr(1) 2179 .sr(1) 2180 .m(6) 2181 .n(8) 2182 .k(k) 2183 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2184 } 2185 } 2186 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_lt_8_subtile)2187 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_lt_8_subtile) { 2188 TEST_REQUIRES_ARM_NEON; 2189 for (size_t k = 1; k < 8; k++) { 2190 for (uint32_t n = 1; n <= 8; n++) { 2191 for (uint32_t m = 1; m <= 6; m++) { 2192 GemmMicrokernelTester() 2193 .mr(6) 2194 .nr(8) 2195 .kr(1) 2196 .sr(1) 2197 .m(m) 2198 .n(n) 2199 .k(k) 2200 .iterations(1) 2201 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2202 } 2203 } 2204 } 2205 } 2206 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_gt_8)2207 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_gt_8) { 2208 TEST_REQUIRES_ARM_NEON; 2209 for (size_t k = 9; k < 16; k++) { 2210 GemmMicrokernelTester() 2211 .mr(6) 2212 .nr(8) 2213 .kr(1) 2214 .sr(1) 2215 .m(6) 2216 .n(8) 2217 .k(k) 2218 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2219 } 2220 } 2221 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_gt_8_subtile)2222 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_gt_8_subtile) { 2223 TEST_REQUIRES_ARM_NEON; 2224 for (size_t k = 9; k < 16; k++) { 2225 for (uint32_t n = 1; n <= 8; n++) { 2226 for (uint32_t m = 1; m <= 6; m++) { 2227 GemmMicrokernelTester() 2228 .mr(6) 2229 .nr(8) 2230 .kr(1) 2231 .sr(1) 2232 .m(m) 2233 .n(n) 2234 .k(k) 2235 .iterations(1) 2236 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2237 } 2238 } 2239 } 2240 } 2241 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_div_8)2242 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_div_8) { 2243 TEST_REQUIRES_ARM_NEON; 2244 for (size_t k = 16; k <= 80; k += 8) { 2245 GemmMicrokernelTester() 2246 .mr(6) 2247 .nr(8) 2248 .kr(1) 2249 .sr(1) 2250 .m(6) 2251 .n(8) 2252 .k(k) 2253 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2254 } 2255 } 2256 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_div_8_subtile)2257 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_div_8_subtile) { 2258 TEST_REQUIRES_ARM_NEON; 2259 for (size_t k = 16; k <= 80; k += 8) { 2260 for (uint32_t n = 1; n <= 8; n++) { 2261 for (uint32_t m = 1; m <= 6; m++) { 2262 GemmMicrokernelTester() 2263 .mr(6) 2264 .nr(8) 2265 .kr(1) 2266 .sr(1) 2267 .m(m) 2268 .n(n) 2269 .k(k) 2270 .iterations(1) 2271 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2272 } 2273 } 2274 } 2275 } 2276 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,n_gt_8)2277 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_gt_8) { 2278 TEST_REQUIRES_ARM_NEON; 2279 for (uint32_t n = 9; n < 16; n++) { 2280 for (size_t k = 1; k <= 40; k += 9) { 2281 GemmMicrokernelTester() 2282 .mr(6) 2283 .nr(8) 2284 .kr(1) 2285 .sr(1) 2286 .m(6) 2287 .n(n) 2288 .k(k) 2289 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2290 } 2291 } 2292 } 2293 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,n_gt_8_strided_cn)2294 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_gt_8_strided_cn) { 2295 TEST_REQUIRES_ARM_NEON; 2296 for (uint32_t n = 9; n < 16; n++) { 2297 for (size_t k = 1; k <= 40; k += 9) { 2298 GemmMicrokernelTester() 2299 .mr(6) 2300 .nr(8) 2301 .kr(1) 2302 .sr(1) 2303 .m(6) 2304 .n(n) 2305 .k(k) 2306 .cn_stride(11) 2307 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2308 } 2309 } 2310 } 2311 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,n_gt_8_subtile)2312 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_gt_8_subtile) { 2313 TEST_REQUIRES_ARM_NEON; 2314 for (uint32_t n = 9; n < 16; n++) { 2315 for (size_t k = 1; k <= 40; k += 9) { 2316 for (uint32_t m = 1; m <= 6; m++) { 2317 GemmMicrokernelTester() 2318 .mr(6) 2319 .nr(8) 2320 .kr(1) 2321 .sr(1) 2322 .m(m) 2323 .n(n) 2324 .k(k) 2325 .iterations(1) 2326 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2327 } 2328 } 2329 } 2330 } 2331 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,n_div_8)2332 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_div_8) { 2333 TEST_REQUIRES_ARM_NEON; 2334 for (uint32_t n = 16; n <= 24; n += 8) { 2335 for (size_t k = 1; k <= 40; k += 9) { 2336 GemmMicrokernelTester() 2337 .mr(6) 2338 .nr(8) 2339 .kr(1) 2340 .sr(1) 2341 .m(6) 2342 .n(n) 2343 .k(k) 2344 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2345 } 2346 } 2347 } 2348 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,n_div_8_strided_cn)2349 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_div_8_strided_cn) { 2350 TEST_REQUIRES_ARM_NEON; 2351 for (uint32_t n = 16; n <= 24; n += 8) { 2352 for (size_t k = 1; k <= 40; k += 9) { 2353 GemmMicrokernelTester() 2354 .mr(6) 2355 .nr(8) 2356 .kr(1) 2357 .sr(1) 2358 .m(6) 2359 .n(n) 2360 .k(k) 2361 .cn_stride(11) 2362 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2363 } 2364 } 2365 } 2366 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,n_div_8_subtile)2367 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_div_8_subtile) { 2368 TEST_REQUIRES_ARM_NEON; 2369 for (uint32_t n = 16; n <= 24; n += 8) { 2370 for (size_t k = 1; k <= 40; k += 9) { 2371 for (uint32_t m = 1; m <= 6; m++) { 2372 GemmMicrokernelTester() 2373 .mr(6) 2374 .nr(8) 2375 .kr(1) 2376 .sr(1) 2377 .m(m) 2378 .n(n) 2379 .k(k) 2380 .iterations(1) 2381 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2382 } 2383 } 2384 } 2385 } 2386 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,small_kernel)2387 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, small_kernel) { 2388 TEST_REQUIRES_ARM_NEON; 2389 for (size_t k = 1; k <= 40; k += 9) { 2390 GemmMicrokernelTester() 2391 .mr(6) 2392 .nr(8) 2393 .kr(1) 2394 .sr(1) 2395 .m(6) 2396 .n(8) 2397 .k(k) 2398 .ks(3) 2399 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2400 } 2401 } 2402 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,small_kernel_subtile)2403 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, small_kernel_subtile) { 2404 TEST_REQUIRES_ARM_NEON; 2405 for (size_t k = 1; k <= 40; k += 9) { 2406 for (uint32_t n = 1; n <= 8; n++) { 2407 for (uint32_t m = 1; m <= 6; m++) { 2408 GemmMicrokernelTester() 2409 .mr(6) 2410 .nr(8) 2411 .kr(1) 2412 .sr(1) 2413 .m(m) 2414 .n(n) 2415 .k(k) 2416 .ks(3) 2417 .iterations(1) 2418 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2419 } 2420 } 2421 } 2422 } 2423 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,n_gt_8_small_kernel)2424 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_gt_8_small_kernel) { 2425 TEST_REQUIRES_ARM_NEON; 2426 for (uint32_t n = 9; n < 16; n++) { 2427 for (size_t k = 1; k <= 40; k += 9) { 2428 GemmMicrokernelTester() 2429 .mr(6) 2430 .nr(8) 2431 .kr(1) 2432 .sr(1) 2433 .m(6) 2434 .n(n) 2435 .k(k) 2436 .ks(3) 2437 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2438 } 2439 } 2440 } 2441 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,n_div_8_small_kernel)2442 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_div_8_small_kernel) { 2443 TEST_REQUIRES_ARM_NEON; 2444 for (uint32_t n = 16; n <= 24; n += 8) { 2445 for (size_t k = 1; k <= 40; k += 9) { 2446 GemmMicrokernelTester() 2447 .mr(6) 2448 .nr(8) 2449 .kr(1) 2450 .sr(1) 2451 .m(6) 2452 .n(n) 2453 .k(k) 2454 .ks(3) 2455 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2456 } 2457 } 2458 } 2459 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,strided_cm_subtile)2460 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, strided_cm_subtile) { 2461 TEST_REQUIRES_ARM_NEON; 2462 for (size_t k = 1; k <= 40; k += 9) { 2463 for (uint32_t n = 1; n <= 8; n++) { 2464 for (uint32_t m = 1; m <= 6; m++) { 2465 GemmMicrokernelTester() 2466 .mr(6) 2467 .nr(8) 2468 .kr(1) 2469 .sr(1) 2470 .m(m) 2471 .n(n) 2472 .k(k) 2473 .cm_stride(11) 2474 .iterations(1) 2475 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2476 } 2477 } 2478 } 2479 } 2480 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,a_offset)2481 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, a_offset) { 2482 TEST_REQUIRES_ARM_NEON; 2483 for (size_t k = 1; k <= 40; k += 9) { 2484 GemmMicrokernelTester() 2485 .mr(6) 2486 .nr(8) 2487 .kr(1) 2488 .sr(1) 2489 .m(6) 2490 .n(8) 2491 .k(k) 2492 .ks(3) 2493 .a_offset(251) 2494 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2495 } 2496 } 2497 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,zero)2498 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, zero) { 2499 TEST_REQUIRES_ARM_NEON; 2500 for (size_t k = 1; k <= 40; k += 9) { 2501 for (uint32_t mz = 0; mz < 6; mz++) { 2502 GemmMicrokernelTester() 2503 .mr(6) 2504 .nr(8) 2505 .kr(1) 2506 .sr(1) 2507 .m(6) 2508 .n(8) 2509 .k(k) 2510 .ks(3) 2511 .a_offset(251) 2512 .zero_index(mz) 2513 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2514 } 2515 } 2516 } 2517 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,qmin)2518 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, qmin) { 2519 TEST_REQUIRES_ARM_NEON; 2520 GemmMicrokernelTester() 2521 .mr(6) 2522 .nr(8) 2523 .kr(1) 2524 .sr(1) 2525 .m(6) 2526 .n(8) 2527 .k(8) 2528 .qmin(128) 2529 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2530 } 2531 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,qmax)2532 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, qmax) { 2533 TEST_REQUIRES_ARM_NEON; 2534 GemmMicrokernelTester() 2535 .mr(6) 2536 .nr(8) 2537 .kr(1) 2538 .sr(1) 2539 .m(6) 2540 .n(8) 2541 .k(8) 2542 .qmax(128) 2543 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2544 } 2545 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,strided_cm)2546 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, strided_cm) { 2547 TEST_REQUIRES_ARM_NEON; 2548 GemmMicrokernelTester() 2549 .mr(6) 2550 .nr(8) 2551 .kr(1) 2552 .sr(1) 2553 .m(6) 2554 .n(8) 2555 .k(8) 2556 .cm_stride(11) 2557 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2558 } 2559 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,no_a_zero_point)2560 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, no_a_zero_point) { 2561 TEST_REQUIRES_ARM_NEON; 2562 for (size_t k = 1; k <= 40; k += 9) { 2563 GemmMicrokernelTester() 2564 .mr(6) 2565 .nr(8) 2566 .kr(1) 2567 .sr(1) 2568 .m(6) 2569 .n(8) 2570 .k(k) 2571 .a_zero_point(0) 2572 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2573 } 2574 } 2575 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,no_b_zero_point)2576 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, no_b_zero_point) { 2577 TEST_REQUIRES_ARM_NEON; 2578 for (size_t k = 1; k <= 40; k += 9) { 2579 GemmMicrokernelTester() 2580 .mr(6) 2581 .nr(8) 2582 .kr(1) 2583 .sr(1) 2584 .m(6) 2585 .n(8) 2586 .k(k) 2587 .b_zero_point(0) 2588 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2589 } 2590 } 2591 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,no_zero_point)2592 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, no_zero_point) { 2593 TEST_REQUIRES_ARM_NEON; 2594 for (size_t k = 1; k <= 40; k += 9) { 2595 GemmMicrokernelTester() 2596 .mr(6) 2597 .nr(8) 2598 .kr(1) 2599 .sr(1) 2600 .m(6) 2601 .n(8) 2602 .k(k) 2603 .a_zero_point(0) 2604 .b_zero_point(0) 2605 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2606 } 2607 } 2608 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 2609 2610 2611 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_eq_8)2612 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8) { 2613 TEST_REQUIRES_ARM_NEON; 2614 GemmMicrokernelTester() 2615 .mr(4) 2616 .nr(16) 2617 .kr(1) 2618 .sr(1) 2619 .m(4) 2620 .n(16) 2621 .k(8) 2622 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2623 } 2624 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,strided_cn)2625 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, strided_cn) { 2626 TEST_REQUIRES_ARM_NEON; 2627 GemmMicrokernelTester() 2628 .mr(4) 2629 .nr(16) 2630 .kr(1) 2631 .sr(1) 2632 .m(4) 2633 .n(16) 2634 .k(8) 2635 .cn_stride(19) 2636 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2637 } 2638 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_eq_8_subtile)2639 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_subtile) { 2640 TEST_REQUIRES_ARM_NEON; 2641 for (uint32_t n = 1; n <= 16; n++) { 2642 for (uint32_t m = 1; m <= 4; m++) { 2643 GemmMicrokernelTester() 2644 .mr(4) 2645 .nr(16) 2646 .kr(1) 2647 .sr(1) 2648 .m(m) 2649 .n(n) 2650 .k(8) 2651 .iterations(1) 2652 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2653 } 2654 } 2655 } 2656 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_eq_8_subtile_m)2657 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_subtile_m) { 2658 TEST_REQUIRES_ARM_NEON; 2659 for (uint32_t m = 1; m <= 4; m++) { 2660 GemmMicrokernelTester() 2661 .mr(4) 2662 .nr(16) 2663 .kr(1) 2664 .sr(1) 2665 .m(m) 2666 .n(16) 2667 .k(8) 2668 .iterations(1) 2669 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2670 } 2671 } 2672 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_eq_8_subtile_n)2673 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_subtile_n) { 2674 TEST_REQUIRES_ARM_NEON; 2675 for (uint32_t n = 1; n <= 16; n++) { 2676 GemmMicrokernelTester() 2677 .mr(4) 2678 .nr(16) 2679 .kr(1) 2680 .sr(1) 2681 .m(4) 2682 .n(n) 2683 .k(8) 2684 .iterations(1) 2685 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2686 } 2687 } 2688 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_lt_8)2689 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_lt_8) { 2690 TEST_REQUIRES_ARM_NEON; 2691 for (size_t k = 1; k < 8; k++) { 2692 GemmMicrokernelTester() 2693 .mr(4) 2694 .nr(16) 2695 .kr(1) 2696 .sr(1) 2697 .m(4) 2698 .n(16) 2699 .k(k) 2700 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2701 } 2702 } 2703 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_lt_8_subtile)2704 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_lt_8_subtile) { 2705 TEST_REQUIRES_ARM_NEON; 2706 for (size_t k = 1; k < 8; k++) { 2707 for (uint32_t n = 1; n <= 16; n++) { 2708 for (uint32_t m = 1; m <= 4; m++) { 2709 GemmMicrokernelTester() 2710 .mr(4) 2711 .nr(16) 2712 .kr(1) 2713 .sr(1) 2714 .m(m) 2715 .n(n) 2716 .k(k) 2717 .iterations(1) 2718 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2719 } 2720 } 2721 } 2722 } 2723 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_gt_8)2724 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_gt_8) { 2725 TEST_REQUIRES_ARM_NEON; 2726 for (size_t k = 9; k < 16; k++) { 2727 GemmMicrokernelTester() 2728 .mr(4) 2729 .nr(16) 2730 .kr(1) 2731 .sr(1) 2732 .m(4) 2733 .n(16) 2734 .k(k) 2735 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2736 } 2737 } 2738 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_gt_8_subtile)2739 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_gt_8_subtile) { 2740 TEST_REQUIRES_ARM_NEON; 2741 for (size_t k = 9; k < 16; k++) { 2742 for (uint32_t n = 1; n <= 16; n++) { 2743 for (uint32_t m = 1; m <= 4; m++) { 2744 GemmMicrokernelTester() 2745 .mr(4) 2746 .nr(16) 2747 .kr(1) 2748 .sr(1) 2749 .m(m) 2750 .n(n) 2751 .k(k) 2752 .iterations(1) 2753 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2754 } 2755 } 2756 } 2757 } 2758 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_div_8)2759 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_div_8) { 2760 TEST_REQUIRES_ARM_NEON; 2761 for (size_t k = 16; k <= 80; k += 8) { 2762 GemmMicrokernelTester() 2763 .mr(4) 2764 .nr(16) 2765 .kr(1) 2766 .sr(1) 2767 .m(4) 2768 .n(16) 2769 .k(k) 2770 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2771 } 2772 } 2773 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_div_8_subtile)2774 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_div_8_subtile) { 2775 TEST_REQUIRES_ARM_NEON; 2776 for (size_t k = 16; k <= 80; k += 8) { 2777 for (uint32_t n = 1; n <= 16; n++) { 2778 for (uint32_t m = 1; m <= 4; m++) { 2779 GemmMicrokernelTester() 2780 .mr(4) 2781 .nr(16) 2782 .kr(1) 2783 .sr(1) 2784 .m(m) 2785 .n(n) 2786 .k(k) 2787 .iterations(1) 2788 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2789 } 2790 } 2791 } 2792 } 2793 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_gt_16)2794 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16) { 2795 TEST_REQUIRES_ARM_NEON; 2796 for (uint32_t n = 17; n < 32; n++) { 2797 for (size_t k = 1; k <= 40; k += 9) { 2798 GemmMicrokernelTester() 2799 .mr(4) 2800 .nr(16) 2801 .kr(1) 2802 .sr(1) 2803 .m(4) 2804 .n(n) 2805 .k(k) 2806 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2807 } 2808 } 2809 } 2810 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_gt_16_strided_cn)2811 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16_strided_cn) { 2812 TEST_REQUIRES_ARM_NEON; 2813 for (uint32_t n = 17; n < 32; n++) { 2814 for (size_t k = 1; k <= 40; k += 9) { 2815 GemmMicrokernelTester() 2816 .mr(4) 2817 .nr(16) 2818 .kr(1) 2819 .sr(1) 2820 .m(4) 2821 .n(n) 2822 .k(k) 2823 .cn_stride(19) 2824 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2825 } 2826 } 2827 } 2828 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_gt_16_subtile)2829 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16_subtile) { 2830 TEST_REQUIRES_ARM_NEON; 2831 for (uint32_t n = 17; n < 32; n++) { 2832 for (size_t k = 1; k <= 40; k += 9) { 2833 for (uint32_t m = 1; m <= 4; m++) { 2834 GemmMicrokernelTester() 2835 .mr(4) 2836 .nr(16) 2837 .kr(1) 2838 .sr(1) 2839 .m(m) 2840 .n(n) 2841 .k(k) 2842 .iterations(1) 2843 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2844 } 2845 } 2846 } 2847 } 2848 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_div_16)2849 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16) { 2850 TEST_REQUIRES_ARM_NEON; 2851 for (uint32_t n = 32; n <= 48; n += 16) { 2852 for (size_t k = 1; k <= 40; k += 9) { 2853 GemmMicrokernelTester() 2854 .mr(4) 2855 .nr(16) 2856 .kr(1) 2857 .sr(1) 2858 .m(4) 2859 .n(n) 2860 .k(k) 2861 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2862 } 2863 } 2864 } 2865 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_div_16_strided_cn)2866 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16_strided_cn) { 2867 TEST_REQUIRES_ARM_NEON; 2868 for (uint32_t n = 32; n <= 48; n += 16) { 2869 for (size_t k = 1; k <= 40; k += 9) { 2870 GemmMicrokernelTester() 2871 .mr(4) 2872 .nr(16) 2873 .kr(1) 2874 .sr(1) 2875 .m(4) 2876 .n(n) 2877 .k(k) 2878 .cn_stride(19) 2879 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2880 } 2881 } 2882 } 2883 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_div_16_subtile)2884 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16_subtile) { 2885 TEST_REQUIRES_ARM_NEON; 2886 for (uint32_t n = 32; n <= 48; n += 16) { 2887 for (size_t k = 1; k <= 40; k += 9) { 2888 for (uint32_t m = 1; m <= 4; m++) { 2889 GemmMicrokernelTester() 2890 .mr(4) 2891 .nr(16) 2892 .kr(1) 2893 .sr(1) 2894 .m(m) 2895 .n(n) 2896 .k(k) 2897 .iterations(1) 2898 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2899 } 2900 } 2901 } 2902 } 2903 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,small_kernel)2904 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, small_kernel) { 2905 TEST_REQUIRES_ARM_NEON; 2906 for (size_t k = 1; k <= 40; k += 9) { 2907 GemmMicrokernelTester() 2908 .mr(4) 2909 .nr(16) 2910 .kr(1) 2911 .sr(1) 2912 .m(4) 2913 .n(16) 2914 .k(k) 2915 .ks(3) 2916 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2917 } 2918 } 2919 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,small_kernel_subtile)2920 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, small_kernel_subtile) { 2921 TEST_REQUIRES_ARM_NEON; 2922 for (size_t k = 1; k <= 40; k += 9) { 2923 for (uint32_t n = 1; n <= 16; n++) { 2924 for (uint32_t m = 1; m <= 4; m++) { 2925 GemmMicrokernelTester() 2926 .mr(4) 2927 .nr(16) 2928 .kr(1) 2929 .sr(1) 2930 .m(m) 2931 .n(n) 2932 .k(k) 2933 .ks(3) 2934 .iterations(1) 2935 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2936 } 2937 } 2938 } 2939 } 2940 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_gt_16_small_kernel)2941 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16_small_kernel) { 2942 TEST_REQUIRES_ARM_NEON; 2943 for (uint32_t n = 17; n < 32; n++) { 2944 for (size_t k = 1; k <= 40; k += 9) { 2945 GemmMicrokernelTester() 2946 .mr(4) 2947 .nr(16) 2948 .kr(1) 2949 .sr(1) 2950 .m(4) 2951 .n(n) 2952 .k(k) 2953 .ks(3) 2954 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2955 } 2956 } 2957 } 2958 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_div_16_small_kernel)2959 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16_small_kernel) { 2960 TEST_REQUIRES_ARM_NEON; 2961 for (uint32_t n = 32; n <= 48; n += 16) { 2962 for (size_t k = 1; k <= 40; k += 9) { 2963 GemmMicrokernelTester() 2964 .mr(4) 2965 .nr(16) 2966 .kr(1) 2967 .sr(1) 2968 .m(4) 2969 .n(n) 2970 .k(k) 2971 .ks(3) 2972 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2973 } 2974 } 2975 } 2976 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,strided_cm_subtile)2977 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, strided_cm_subtile) { 2978 TEST_REQUIRES_ARM_NEON; 2979 for (size_t k = 1; k <= 40; k += 9) { 2980 for (uint32_t n = 1; n <= 16; n++) { 2981 for (uint32_t m = 1; m <= 4; m++) { 2982 GemmMicrokernelTester() 2983 .mr(4) 2984 .nr(16) 2985 .kr(1) 2986 .sr(1) 2987 .m(m) 2988 .n(n) 2989 .k(k) 2990 .cm_stride(19) 2991 .iterations(1) 2992 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2993 } 2994 } 2995 } 2996 } 2997 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,a_offset)2998 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, a_offset) { 2999 TEST_REQUIRES_ARM_NEON; 3000 for (size_t k = 1; k <= 40; k += 9) { 3001 GemmMicrokernelTester() 3002 .mr(4) 3003 .nr(16) 3004 .kr(1) 3005 .sr(1) 3006 .m(4) 3007 .n(16) 3008 .k(k) 3009 .ks(3) 3010 .a_offset(163) 3011 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3012 } 3013 } 3014 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,zero)3015 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, zero) { 3016 TEST_REQUIRES_ARM_NEON; 3017 for (size_t k = 1; k <= 40; k += 9) { 3018 for (uint32_t mz = 0; mz < 4; mz++) { 3019 GemmMicrokernelTester() 3020 .mr(4) 3021 .nr(16) 3022 .kr(1) 3023 .sr(1) 3024 .m(4) 3025 .n(16) 3026 .k(k) 3027 .ks(3) 3028 .a_offset(163) 3029 .zero_index(mz) 3030 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3031 } 3032 } 3033 } 3034 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,qmin)3035 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, qmin) { 3036 TEST_REQUIRES_ARM_NEON; 3037 GemmMicrokernelTester() 3038 .mr(4) 3039 .nr(16) 3040 .kr(1) 3041 .sr(1) 3042 .m(4) 3043 .n(16) 3044 .k(8) 3045 .qmin(128) 3046 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3047 } 3048 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,qmax)3049 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, qmax) { 3050 TEST_REQUIRES_ARM_NEON; 3051 GemmMicrokernelTester() 3052 .mr(4) 3053 .nr(16) 3054 .kr(1) 3055 .sr(1) 3056 .m(4) 3057 .n(16) 3058 .k(8) 3059 .qmax(128) 3060 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3061 } 3062 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,strided_cm)3063 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, strided_cm) { 3064 TEST_REQUIRES_ARM_NEON; 3065 GemmMicrokernelTester() 3066 .mr(4) 3067 .nr(16) 3068 .kr(1) 3069 .sr(1) 3070 .m(4) 3071 .n(16) 3072 .k(8) 3073 .cm_stride(19) 3074 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3075 } 3076 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,no_a_zero_point)3077 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, no_a_zero_point) { 3078 TEST_REQUIRES_ARM_NEON; 3079 for (size_t k = 1; k <= 40; k += 9) { 3080 GemmMicrokernelTester() 3081 .mr(4) 3082 .nr(16) 3083 .kr(1) 3084 .sr(1) 3085 .m(4) 3086 .n(16) 3087 .k(k) 3088 .a_zero_point(0) 3089 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3090 } 3091 } 3092 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,no_b_zero_point)3093 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, no_b_zero_point) { 3094 TEST_REQUIRES_ARM_NEON; 3095 for (size_t k = 1; k <= 40; k += 9) { 3096 GemmMicrokernelTester() 3097 .mr(4) 3098 .nr(16) 3099 .kr(1) 3100 .sr(1) 3101 .m(4) 3102 .n(16) 3103 .k(k) 3104 .b_zero_point(0) 3105 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3106 } 3107 } 3108 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,no_zero_point)3109 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, no_zero_point) { 3110 TEST_REQUIRES_ARM_NEON; 3111 for (size_t k = 1; k <= 40; k += 9) { 3112 GemmMicrokernelTester() 3113 .mr(4) 3114 .nr(16) 3115 .kr(1) 3116 .sr(1) 3117 .m(4) 3118 .n(16) 3119 .k(k) 3120 .a_zero_point(0) 3121 .b_zero_point(0) 3122 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3123 } 3124 } 3125 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 3126 3127 3128 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_eq_8)3129 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8) { 3130 TEST_REQUIRES_ARM_NEON; 3131 GemmMicrokernelTester() 3132 .mr(6) 3133 .nr(16) 3134 .kr(1) 3135 .sr(1) 3136 .m(6) 3137 .n(16) 3138 .k(8) 3139 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3140 } 3141 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,strided_cn)3142 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, strided_cn) { 3143 TEST_REQUIRES_ARM_NEON; 3144 GemmMicrokernelTester() 3145 .mr(6) 3146 .nr(16) 3147 .kr(1) 3148 .sr(1) 3149 .m(6) 3150 .n(16) 3151 .k(8) 3152 .cn_stride(19) 3153 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3154 } 3155 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_eq_8_subtile)3156 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8_subtile) { 3157 TEST_REQUIRES_ARM_NEON; 3158 for (uint32_t n = 1; n <= 16; n++) { 3159 for (uint32_t m = 1; m <= 6; m++) { 3160 GemmMicrokernelTester() 3161 .mr(6) 3162 .nr(16) 3163 .kr(1) 3164 .sr(1) 3165 .m(m) 3166 .n(n) 3167 .k(8) 3168 .iterations(1) 3169 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3170 } 3171 } 3172 } 3173 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_eq_8_subtile_m)3174 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8_subtile_m) { 3175 TEST_REQUIRES_ARM_NEON; 3176 for (uint32_t m = 1; m <= 6; m++) { 3177 GemmMicrokernelTester() 3178 .mr(6) 3179 .nr(16) 3180 .kr(1) 3181 .sr(1) 3182 .m(m) 3183 .n(16) 3184 .k(8) 3185 .iterations(1) 3186 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3187 } 3188 } 3189 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_eq_8_subtile_n)3190 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8_subtile_n) { 3191 TEST_REQUIRES_ARM_NEON; 3192 for (uint32_t n = 1; n <= 16; n++) { 3193 GemmMicrokernelTester() 3194 .mr(6) 3195 .nr(16) 3196 .kr(1) 3197 .sr(1) 3198 .m(6) 3199 .n(n) 3200 .k(8) 3201 .iterations(1) 3202 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3203 } 3204 } 3205 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_lt_8)3206 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_lt_8) { 3207 TEST_REQUIRES_ARM_NEON; 3208 for (size_t k = 1; k < 8; k++) { 3209 GemmMicrokernelTester() 3210 .mr(6) 3211 .nr(16) 3212 .kr(1) 3213 .sr(1) 3214 .m(6) 3215 .n(16) 3216 .k(k) 3217 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3218 } 3219 } 3220 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_lt_8_subtile)3221 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_lt_8_subtile) { 3222 TEST_REQUIRES_ARM_NEON; 3223 for (size_t k = 1; k < 8; k++) { 3224 for (uint32_t n = 1; n <= 16; n++) { 3225 for (uint32_t m = 1; m <= 6; m++) { 3226 GemmMicrokernelTester() 3227 .mr(6) 3228 .nr(16) 3229 .kr(1) 3230 .sr(1) 3231 .m(m) 3232 .n(n) 3233 .k(k) 3234 .iterations(1) 3235 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3236 } 3237 } 3238 } 3239 } 3240 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_gt_8)3241 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_gt_8) { 3242 TEST_REQUIRES_ARM_NEON; 3243 for (size_t k = 9; k < 16; k++) { 3244 GemmMicrokernelTester() 3245 .mr(6) 3246 .nr(16) 3247 .kr(1) 3248 .sr(1) 3249 .m(6) 3250 .n(16) 3251 .k(k) 3252 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3253 } 3254 } 3255 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_gt_8_subtile)3256 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_gt_8_subtile) { 3257 TEST_REQUIRES_ARM_NEON; 3258 for (size_t k = 9; k < 16; k++) { 3259 for (uint32_t n = 1; n <= 16; n++) { 3260 for (uint32_t m = 1; m <= 6; m++) { 3261 GemmMicrokernelTester() 3262 .mr(6) 3263 .nr(16) 3264 .kr(1) 3265 .sr(1) 3266 .m(m) 3267 .n(n) 3268 .k(k) 3269 .iterations(1) 3270 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3271 } 3272 } 3273 } 3274 } 3275 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_div_8)3276 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_div_8) { 3277 TEST_REQUIRES_ARM_NEON; 3278 for (size_t k = 16; k <= 80; k += 8) { 3279 GemmMicrokernelTester() 3280 .mr(6) 3281 .nr(16) 3282 .kr(1) 3283 .sr(1) 3284 .m(6) 3285 .n(16) 3286 .k(k) 3287 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3288 } 3289 } 3290 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_div_8_subtile)3291 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_div_8_subtile) { 3292 TEST_REQUIRES_ARM_NEON; 3293 for (size_t k = 16; k <= 80; k += 8) { 3294 for (uint32_t n = 1; n <= 16; n++) { 3295 for (uint32_t m = 1; m <= 6; m++) { 3296 GemmMicrokernelTester() 3297 .mr(6) 3298 .nr(16) 3299 .kr(1) 3300 .sr(1) 3301 .m(m) 3302 .n(n) 3303 .k(k) 3304 .iterations(1) 3305 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3306 } 3307 } 3308 } 3309 } 3310 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,n_gt_16)3311 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16) { 3312 TEST_REQUIRES_ARM_NEON; 3313 for (uint32_t n = 17; n < 32; n++) { 3314 for (size_t k = 1; k <= 40; k += 9) { 3315 GemmMicrokernelTester() 3316 .mr(6) 3317 .nr(16) 3318 .kr(1) 3319 .sr(1) 3320 .m(6) 3321 .n(n) 3322 .k(k) 3323 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3324 } 3325 } 3326 } 3327 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,n_gt_16_strided_cn)3328 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16_strided_cn) { 3329 TEST_REQUIRES_ARM_NEON; 3330 for (uint32_t n = 17; n < 32; n++) { 3331 for (size_t k = 1; k <= 40; k += 9) { 3332 GemmMicrokernelTester() 3333 .mr(6) 3334 .nr(16) 3335 .kr(1) 3336 .sr(1) 3337 .m(6) 3338 .n(n) 3339 .k(k) 3340 .cn_stride(19) 3341 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3342 } 3343 } 3344 } 3345 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,n_gt_16_subtile)3346 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16_subtile) { 3347 TEST_REQUIRES_ARM_NEON; 3348 for (uint32_t n = 17; n < 32; n++) { 3349 for (size_t k = 1; k <= 40; k += 9) { 3350 for (uint32_t m = 1; m <= 6; m++) { 3351 GemmMicrokernelTester() 3352 .mr(6) 3353 .nr(16) 3354 .kr(1) 3355 .sr(1) 3356 .m(m) 3357 .n(n) 3358 .k(k) 3359 .iterations(1) 3360 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3361 } 3362 } 3363 } 3364 } 3365 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,n_div_16)3366 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16) { 3367 TEST_REQUIRES_ARM_NEON; 3368 for (uint32_t n = 32; n <= 48; n += 16) { 3369 for (size_t k = 1; k <= 40; k += 9) { 3370 GemmMicrokernelTester() 3371 .mr(6) 3372 .nr(16) 3373 .kr(1) 3374 .sr(1) 3375 .m(6) 3376 .n(n) 3377 .k(k) 3378 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3379 } 3380 } 3381 } 3382 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,n_div_16_strided_cn)3383 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16_strided_cn) { 3384 TEST_REQUIRES_ARM_NEON; 3385 for (uint32_t n = 32; n <= 48; n += 16) { 3386 for (size_t k = 1; k <= 40; k += 9) { 3387 GemmMicrokernelTester() 3388 .mr(6) 3389 .nr(16) 3390 .kr(1) 3391 .sr(1) 3392 .m(6) 3393 .n(n) 3394 .k(k) 3395 .cn_stride(19) 3396 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3397 } 3398 } 3399 } 3400 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,n_div_16_subtile)3401 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16_subtile) { 3402 TEST_REQUIRES_ARM_NEON; 3403 for (uint32_t n = 32; n <= 48; n += 16) { 3404 for (size_t k = 1; k <= 40; k += 9) { 3405 for (uint32_t m = 1; m <= 6; m++) { 3406 GemmMicrokernelTester() 3407 .mr(6) 3408 .nr(16) 3409 .kr(1) 3410 .sr(1) 3411 .m(m) 3412 .n(n) 3413 .k(k) 3414 .iterations(1) 3415 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3416 } 3417 } 3418 } 3419 } 3420 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,small_kernel)3421 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, small_kernel) { 3422 TEST_REQUIRES_ARM_NEON; 3423 for (size_t k = 1; k <= 40; k += 9) { 3424 GemmMicrokernelTester() 3425 .mr(6) 3426 .nr(16) 3427 .kr(1) 3428 .sr(1) 3429 .m(6) 3430 .n(16) 3431 .k(k) 3432 .ks(3) 3433 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3434 } 3435 } 3436 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,small_kernel_subtile)3437 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, small_kernel_subtile) { 3438 TEST_REQUIRES_ARM_NEON; 3439 for (size_t k = 1; k <= 40; k += 9) { 3440 for (uint32_t n = 1; n <= 16; n++) { 3441 for (uint32_t m = 1; m <= 6; m++) { 3442 GemmMicrokernelTester() 3443 .mr(6) 3444 .nr(16) 3445 .kr(1) 3446 .sr(1) 3447 .m(m) 3448 .n(n) 3449 .k(k) 3450 .ks(3) 3451 .iterations(1) 3452 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3453 } 3454 } 3455 } 3456 } 3457 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,n_gt_16_small_kernel)3458 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16_small_kernel) { 3459 TEST_REQUIRES_ARM_NEON; 3460 for (uint32_t n = 17; n < 32; n++) { 3461 for (size_t k = 1; k <= 40; k += 9) { 3462 GemmMicrokernelTester() 3463 .mr(6) 3464 .nr(16) 3465 .kr(1) 3466 .sr(1) 3467 .m(6) 3468 .n(n) 3469 .k(k) 3470 .ks(3) 3471 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3472 } 3473 } 3474 } 3475 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,n_div_16_small_kernel)3476 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16_small_kernel) { 3477 TEST_REQUIRES_ARM_NEON; 3478 for (uint32_t n = 32; n <= 48; n += 16) { 3479 for (size_t k = 1; k <= 40; k += 9) { 3480 GemmMicrokernelTester() 3481 .mr(6) 3482 .nr(16) 3483 .kr(1) 3484 .sr(1) 3485 .m(6) 3486 .n(n) 3487 .k(k) 3488 .ks(3) 3489 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3490 } 3491 } 3492 } 3493 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,strided_cm_subtile)3494 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, strided_cm_subtile) { 3495 TEST_REQUIRES_ARM_NEON; 3496 for (size_t k = 1; k <= 40; k += 9) { 3497 for (uint32_t n = 1; n <= 16; n++) { 3498 for (uint32_t m = 1; m <= 6; m++) { 3499 GemmMicrokernelTester() 3500 .mr(6) 3501 .nr(16) 3502 .kr(1) 3503 .sr(1) 3504 .m(m) 3505 .n(n) 3506 .k(k) 3507 .cm_stride(19) 3508 .iterations(1) 3509 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3510 } 3511 } 3512 } 3513 } 3514 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,a_offset)3515 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, a_offset) { 3516 TEST_REQUIRES_ARM_NEON; 3517 for (size_t k = 1; k <= 40; k += 9) { 3518 GemmMicrokernelTester() 3519 .mr(6) 3520 .nr(16) 3521 .kr(1) 3522 .sr(1) 3523 .m(6) 3524 .n(16) 3525 .k(k) 3526 .ks(3) 3527 .a_offset(251) 3528 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3529 } 3530 } 3531 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,zero)3532 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, zero) { 3533 TEST_REQUIRES_ARM_NEON; 3534 for (size_t k = 1; k <= 40; k += 9) { 3535 for (uint32_t mz = 0; mz < 6; mz++) { 3536 GemmMicrokernelTester() 3537 .mr(6) 3538 .nr(16) 3539 .kr(1) 3540 .sr(1) 3541 .m(6) 3542 .n(16) 3543 .k(k) 3544 .ks(3) 3545 .a_offset(251) 3546 .zero_index(mz) 3547 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3548 } 3549 } 3550 } 3551 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,qmin)3552 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, qmin) { 3553 TEST_REQUIRES_ARM_NEON; 3554 GemmMicrokernelTester() 3555 .mr(6) 3556 .nr(16) 3557 .kr(1) 3558 .sr(1) 3559 .m(6) 3560 .n(16) 3561 .k(8) 3562 .qmin(128) 3563 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3564 } 3565 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,qmax)3566 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, qmax) { 3567 TEST_REQUIRES_ARM_NEON; 3568 GemmMicrokernelTester() 3569 .mr(6) 3570 .nr(16) 3571 .kr(1) 3572 .sr(1) 3573 .m(6) 3574 .n(16) 3575 .k(8) 3576 .qmax(128) 3577 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3578 } 3579 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,strided_cm)3580 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, strided_cm) { 3581 TEST_REQUIRES_ARM_NEON; 3582 GemmMicrokernelTester() 3583 .mr(6) 3584 .nr(16) 3585 .kr(1) 3586 .sr(1) 3587 .m(6) 3588 .n(16) 3589 .k(8) 3590 .cm_stride(19) 3591 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3592 } 3593 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,no_a_zero_point)3594 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, no_a_zero_point) { 3595 TEST_REQUIRES_ARM_NEON; 3596 for (size_t k = 1; k <= 40; k += 9) { 3597 GemmMicrokernelTester() 3598 .mr(6) 3599 .nr(16) 3600 .kr(1) 3601 .sr(1) 3602 .m(6) 3603 .n(16) 3604 .k(k) 3605 .a_zero_point(0) 3606 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3607 } 3608 } 3609 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,no_b_zero_point)3610 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, no_b_zero_point) { 3611 TEST_REQUIRES_ARM_NEON; 3612 for (size_t k = 1; k <= 40; k += 9) { 3613 GemmMicrokernelTester() 3614 .mr(6) 3615 .nr(16) 3616 .kr(1) 3617 .sr(1) 3618 .m(6) 3619 .n(16) 3620 .k(k) 3621 .b_zero_point(0) 3622 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3623 } 3624 } 3625 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,no_zero_point)3626 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, no_zero_point) { 3627 TEST_REQUIRES_ARM_NEON; 3628 for (size_t k = 1; k <= 40; k += 9) { 3629 GemmMicrokernelTester() 3630 .mr(6) 3631 .nr(16) 3632 .kr(1) 3633 .sr(1) 3634 .m(6) 3635 .n(16) 3636 .k(k) 3637 .a_zero_point(0) 3638 .b_zero_point(0) 3639 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3640 } 3641 } 3642 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 3643 3644 3645 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_8)3646 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_8) { 3647 TEST_REQUIRES_ARM_NEON_DOT; 3648 GemmMicrokernelTester() 3649 .mr(4) 3650 .nr(8) 3651 .kr(4) 3652 .sr(1) 3653 .m(4) 3654 .n(8) 3655 .k(8) 3656 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3657 } 3658 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,strided_cn)3659 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, strided_cn) { 3660 TEST_REQUIRES_ARM_NEON_DOT; 3661 GemmMicrokernelTester() 3662 .mr(4) 3663 .nr(8) 3664 .kr(4) 3665 .sr(1) 3666 .m(4) 3667 .n(8) 3668 .k(8) 3669 .cn_stride(11) 3670 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3671 } 3672 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_8_subtile)3673 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_8_subtile) { 3674 TEST_REQUIRES_ARM_NEON_DOT; 3675 for (uint32_t n = 1; n <= 8; n++) { 3676 for (uint32_t m = 1; m <= 4; m++) { 3677 GemmMicrokernelTester() 3678 .mr(4) 3679 .nr(8) 3680 .kr(4) 3681 .sr(1) 3682 .m(m) 3683 .n(n) 3684 .k(8) 3685 .iterations(1) 3686 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3687 } 3688 } 3689 } 3690 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_8_subtile_m)3691 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_8_subtile_m) { 3692 TEST_REQUIRES_ARM_NEON_DOT; 3693 for (uint32_t m = 1; m <= 4; m++) { 3694 GemmMicrokernelTester() 3695 .mr(4) 3696 .nr(8) 3697 .kr(4) 3698 .sr(1) 3699 .m(m) 3700 .n(8) 3701 .k(8) 3702 .iterations(1) 3703 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3704 } 3705 } 3706 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_8_subtile_n)3707 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_8_subtile_n) { 3708 TEST_REQUIRES_ARM_NEON_DOT; 3709 for (uint32_t n = 1; n <= 8; n++) { 3710 GemmMicrokernelTester() 3711 .mr(4) 3712 .nr(8) 3713 .kr(4) 3714 .sr(1) 3715 .m(4) 3716 .n(n) 3717 .k(8) 3718 .iterations(1) 3719 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3720 } 3721 } 3722 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_8)3723 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_8) { 3724 TEST_REQUIRES_ARM_NEON_DOT; 3725 for (size_t k = 1; k < 8; k++) { 3726 GemmMicrokernelTester() 3727 .mr(4) 3728 .nr(8) 3729 .kr(4) 3730 .sr(1) 3731 .m(4) 3732 .n(8) 3733 .k(k) 3734 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3735 } 3736 } 3737 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_8_subtile)3738 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_8_subtile) { 3739 TEST_REQUIRES_ARM_NEON_DOT; 3740 for (size_t k = 1; k < 8; k++) { 3741 for (uint32_t n = 1; n <= 8; n++) { 3742 for (uint32_t m = 1; m <= 4; m++) { 3743 GemmMicrokernelTester() 3744 .mr(4) 3745 .nr(8) 3746 .kr(4) 3747 .sr(1) 3748 .m(m) 3749 .n(n) 3750 .k(k) 3751 .iterations(1) 3752 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3753 } 3754 } 3755 } 3756 } 3757 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_8)3758 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_8) { 3759 TEST_REQUIRES_ARM_NEON_DOT; 3760 for (size_t k = 9; k < 16; k++) { 3761 GemmMicrokernelTester() 3762 .mr(4) 3763 .nr(8) 3764 .kr(4) 3765 .sr(1) 3766 .m(4) 3767 .n(8) 3768 .k(k) 3769 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3770 } 3771 } 3772 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_8_subtile)3773 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_8_subtile) { 3774 TEST_REQUIRES_ARM_NEON_DOT; 3775 for (size_t k = 9; k < 16; k++) { 3776 for (uint32_t n = 1; n <= 8; n++) { 3777 for (uint32_t m = 1; m <= 4; m++) { 3778 GemmMicrokernelTester() 3779 .mr(4) 3780 .nr(8) 3781 .kr(4) 3782 .sr(1) 3783 .m(m) 3784 .n(n) 3785 .k(k) 3786 .iterations(1) 3787 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3788 } 3789 } 3790 } 3791 } 3792 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,k_div_8)3793 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_div_8) { 3794 TEST_REQUIRES_ARM_NEON_DOT; 3795 for (size_t k = 16; k <= 80; k += 8) { 3796 GemmMicrokernelTester() 3797 .mr(4) 3798 .nr(8) 3799 .kr(4) 3800 .sr(1) 3801 .m(4) 3802 .n(8) 3803 .k(k) 3804 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3805 } 3806 } 3807 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,k_div_8_subtile)3808 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_div_8_subtile) { 3809 TEST_REQUIRES_ARM_NEON_DOT; 3810 for (size_t k = 16; k <= 80; k += 8) { 3811 for (uint32_t n = 1; n <= 8; n++) { 3812 for (uint32_t m = 1; m <= 4; m++) { 3813 GemmMicrokernelTester() 3814 .mr(4) 3815 .nr(8) 3816 .kr(4) 3817 .sr(1) 3818 .m(m) 3819 .n(n) 3820 .k(k) 3821 .iterations(1) 3822 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3823 } 3824 } 3825 } 3826 } 3827 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_8)3828 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_8) { 3829 TEST_REQUIRES_ARM_NEON_DOT; 3830 for (uint32_t n = 9; n < 16; n++) { 3831 for (size_t k = 1; k <= 40; k += 9) { 3832 GemmMicrokernelTester() 3833 .mr(4) 3834 .nr(8) 3835 .kr(4) 3836 .sr(1) 3837 .m(4) 3838 .n(n) 3839 .k(k) 3840 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3841 } 3842 } 3843 } 3844 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_8_strided_cn)3845 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_8_strided_cn) { 3846 TEST_REQUIRES_ARM_NEON_DOT; 3847 for (uint32_t n = 9; n < 16; n++) { 3848 for (size_t k = 1; k <= 40; k += 9) { 3849 GemmMicrokernelTester() 3850 .mr(4) 3851 .nr(8) 3852 .kr(4) 3853 .sr(1) 3854 .m(4) 3855 .n(n) 3856 .k(k) 3857 .cn_stride(11) 3858 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3859 } 3860 } 3861 } 3862 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_8_subtile)3863 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_8_subtile) { 3864 TEST_REQUIRES_ARM_NEON_DOT; 3865 for (uint32_t n = 9; n < 16; n++) { 3866 for (size_t k = 1; k <= 40; k += 9) { 3867 for (uint32_t m = 1; m <= 4; m++) { 3868 GemmMicrokernelTester() 3869 .mr(4) 3870 .nr(8) 3871 .kr(4) 3872 .sr(1) 3873 .m(m) 3874 .n(n) 3875 .k(k) 3876 .iterations(1) 3877 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3878 } 3879 } 3880 } 3881 } 3882 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,n_div_8)3883 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_div_8) { 3884 TEST_REQUIRES_ARM_NEON_DOT; 3885 for (uint32_t n = 16; n <= 24; n += 8) { 3886 for (size_t k = 1; k <= 40; k += 9) { 3887 GemmMicrokernelTester() 3888 .mr(4) 3889 .nr(8) 3890 .kr(4) 3891 .sr(1) 3892 .m(4) 3893 .n(n) 3894 .k(k) 3895 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3896 } 3897 } 3898 } 3899 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,n_div_8_strided_cn)3900 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_div_8_strided_cn) { 3901 TEST_REQUIRES_ARM_NEON_DOT; 3902 for (uint32_t n = 16; n <= 24; n += 8) { 3903 for (size_t k = 1; k <= 40; k += 9) { 3904 GemmMicrokernelTester() 3905 .mr(4) 3906 .nr(8) 3907 .kr(4) 3908 .sr(1) 3909 .m(4) 3910 .n(n) 3911 .k(k) 3912 .cn_stride(11) 3913 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3914 } 3915 } 3916 } 3917 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,n_div_8_subtile)3918 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_div_8_subtile) { 3919 TEST_REQUIRES_ARM_NEON_DOT; 3920 for (uint32_t n = 16; n <= 24; n += 8) { 3921 for (size_t k = 1; k <= 40; k += 9) { 3922 for (uint32_t m = 1; m <= 4; m++) { 3923 GemmMicrokernelTester() 3924 .mr(4) 3925 .nr(8) 3926 .kr(4) 3927 .sr(1) 3928 .m(m) 3929 .n(n) 3930 .k(k) 3931 .iterations(1) 3932 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3933 } 3934 } 3935 } 3936 } 3937 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,small_kernel)3938 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel) { 3939 TEST_REQUIRES_ARM_NEON_DOT; 3940 for (size_t k = 1; k <= 40; k += 9) { 3941 GemmMicrokernelTester() 3942 .mr(4) 3943 .nr(8) 3944 .kr(4) 3945 .sr(1) 3946 .m(4) 3947 .n(8) 3948 .k(k) 3949 .ks(3) 3950 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3951 } 3952 } 3953 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,small_kernel_subtile)3954 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel_subtile) { 3955 TEST_REQUIRES_ARM_NEON_DOT; 3956 for (size_t k = 1; k <= 40; k += 9) { 3957 for (uint32_t n = 1; n <= 8; n++) { 3958 for (uint32_t m = 1; m <= 4; m++) { 3959 GemmMicrokernelTester() 3960 .mr(4) 3961 .nr(8) 3962 .kr(4) 3963 .sr(1) 3964 .m(m) 3965 .n(n) 3966 .k(k) 3967 .ks(3) 3968 .iterations(1) 3969 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3970 } 3971 } 3972 } 3973 } 3974 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_8_small_kernel)3975 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_8_small_kernel) { 3976 TEST_REQUIRES_ARM_NEON_DOT; 3977 for (uint32_t n = 9; n < 16; n++) { 3978 for (size_t k = 1; k <= 40; k += 9) { 3979 GemmMicrokernelTester() 3980 .mr(4) 3981 .nr(8) 3982 .kr(4) 3983 .sr(1) 3984 .m(4) 3985 .n(n) 3986 .k(k) 3987 .ks(3) 3988 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3989 } 3990 } 3991 } 3992 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,n_div_8_small_kernel)3993 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_div_8_small_kernel) { 3994 TEST_REQUIRES_ARM_NEON_DOT; 3995 for (uint32_t n = 16; n <= 24; n += 8) { 3996 for (size_t k = 1; k <= 40; k += 9) { 3997 GemmMicrokernelTester() 3998 .mr(4) 3999 .nr(8) 4000 .kr(4) 4001 .sr(1) 4002 .m(4) 4003 .n(n) 4004 .k(k) 4005 .ks(3) 4006 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4007 } 4008 } 4009 } 4010 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,strided_cm_subtile)4011 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm_subtile) { 4012 TEST_REQUIRES_ARM_NEON_DOT; 4013 for (size_t k = 1; k <= 40; k += 9) { 4014 for (uint32_t n = 1; n <= 8; n++) { 4015 for (uint32_t m = 1; m <= 4; m++) { 4016 GemmMicrokernelTester() 4017 .mr(4) 4018 .nr(8) 4019 .kr(4) 4020 .sr(1) 4021 .m(m) 4022 .n(n) 4023 .k(k) 4024 .cm_stride(11) 4025 .iterations(1) 4026 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4027 } 4028 } 4029 } 4030 } 4031 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,a_offset)4032 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, a_offset) { 4033 TEST_REQUIRES_ARM_NEON_DOT; 4034 for (size_t k = 1; k <= 40; k += 9) { 4035 GemmMicrokernelTester() 4036 .mr(4) 4037 .nr(8) 4038 .kr(4) 4039 .sr(1) 4040 .m(4) 4041 .n(8) 4042 .k(k) 4043 .ks(3) 4044 .a_offset(163) 4045 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4046 } 4047 } 4048 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,zero)4049 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, zero) { 4050 TEST_REQUIRES_ARM_NEON_DOT; 4051 for (size_t k = 1; k <= 40; k += 9) { 4052 for (uint32_t mz = 0; mz < 4; mz++) { 4053 GemmMicrokernelTester() 4054 .mr(4) 4055 .nr(8) 4056 .kr(4) 4057 .sr(1) 4058 .m(4) 4059 .n(8) 4060 .k(k) 4061 .ks(3) 4062 .a_offset(163) 4063 .zero_index(mz) 4064 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4065 } 4066 } 4067 } 4068 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,qmin)4069 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, qmin) { 4070 TEST_REQUIRES_ARM_NEON_DOT; 4071 GemmMicrokernelTester() 4072 .mr(4) 4073 .nr(8) 4074 .kr(4) 4075 .sr(1) 4076 .m(4) 4077 .n(8) 4078 .k(8) 4079 .qmin(128) 4080 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4081 } 4082 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,qmax)4083 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, qmax) { 4084 TEST_REQUIRES_ARM_NEON_DOT; 4085 GemmMicrokernelTester() 4086 .mr(4) 4087 .nr(8) 4088 .kr(4) 4089 .sr(1) 4090 .m(4) 4091 .n(8) 4092 .k(8) 4093 .qmax(128) 4094 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4095 } 4096 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,strided_cm)4097 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm) { 4098 TEST_REQUIRES_ARM_NEON_DOT; 4099 GemmMicrokernelTester() 4100 .mr(4) 4101 .nr(8) 4102 .kr(4) 4103 .sr(1) 4104 .m(4) 4105 .n(8) 4106 .k(8) 4107 .cm_stride(11) 4108 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4109 } 4110 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,no_a_zero_point)4111 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, no_a_zero_point) { 4112 TEST_REQUIRES_ARM_NEON_DOT; 4113 for (size_t k = 1; k <= 40; k += 9) { 4114 GemmMicrokernelTester() 4115 .mr(4) 4116 .nr(8) 4117 .kr(4) 4118 .sr(1) 4119 .m(4) 4120 .n(8) 4121 .k(k) 4122 .a_zero_point(0) 4123 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4124 } 4125 } 4126 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,no_b_zero_point)4127 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, no_b_zero_point) { 4128 TEST_REQUIRES_ARM_NEON_DOT; 4129 for (size_t k = 1; k <= 40; k += 9) { 4130 GemmMicrokernelTester() 4131 .mr(4) 4132 .nr(8) 4133 .kr(4) 4134 .sr(1) 4135 .m(4) 4136 .n(8) 4137 .k(k) 4138 .b_zero_point(0) 4139 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4140 } 4141 } 4142 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55,no_zero_point)4143 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, no_zero_point) { 4144 TEST_REQUIRES_ARM_NEON_DOT; 4145 for (size_t k = 1; k <= 40; k += 9) { 4146 GemmMicrokernelTester() 4147 .mr(4) 4148 .nr(8) 4149 .kr(4) 4150 .sr(1) 4151 .m(4) 4152 .n(8) 4153 .k(k) 4154 .a_zero_point(0) 4155 .b_zero_point(0) 4156 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4157 } 4158 } 4159 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY 4160 4161 4162 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,k_eq_16)4163 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_eq_16) { 4164 TEST_REQUIRES_ARM_NEON_DOT; 4165 GemmMicrokernelTester() 4166 .mr(4) 4167 .nr(8) 4168 .kr(4) 4169 .sr(1) 4170 .m(4) 4171 .n(8) 4172 .k(16) 4173 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4174 } 4175 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,strided_cn)4176 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, strided_cn) { 4177 TEST_REQUIRES_ARM_NEON_DOT; 4178 GemmMicrokernelTester() 4179 .mr(4) 4180 .nr(8) 4181 .kr(4) 4182 .sr(1) 4183 .m(4) 4184 .n(8) 4185 .k(16) 4186 .cn_stride(11) 4187 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4188 } 4189 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile)4190 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile) { 4191 TEST_REQUIRES_ARM_NEON_DOT; 4192 for (uint32_t n = 1; n <= 8; n++) { 4193 for (uint32_t m = 1; m <= 4; m++) { 4194 GemmMicrokernelTester() 4195 .mr(4) 4196 .nr(8) 4197 .kr(4) 4198 .sr(1) 4199 .m(m) 4200 .n(n) 4201 .k(16) 4202 .iterations(1) 4203 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4204 } 4205 } 4206 } 4207 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile_m)4208 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_m) { 4209 TEST_REQUIRES_ARM_NEON_DOT; 4210 for (uint32_t m = 1; m <= 4; m++) { 4211 GemmMicrokernelTester() 4212 .mr(4) 4213 .nr(8) 4214 .kr(4) 4215 .sr(1) 4216 .m(m) 4217 .n(8) 4218 .k(16) 4219 .iterations(1) 4220 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4221 } 4222 } 4223 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile_n)4224 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_n) { 4225 TEST_REQUIRES_ARM_NEON_DOT; 4226 for (uint32_t n = 1; n <= 8; n++) { 4227 GemmMicrokernelTester() 4228 .mr(4) 4229 .nr(8) 4230 .kr(4) 4231 .sr(1) 4232 .m(4) 4233 .n(n) 4234 .k(16) 4235 .iterations(1) 4236 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4237 } 4238 } 4239 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,k_lt_16)4240 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_lt_16) { 4241 TEST_REQUIRES_ARM_NEON_DOT; 4242 for (size_t k = 1; k < 16; k++) { 4243 GemmMicrokernelTester() 4244 .mr(4) 4245 .nr(8) 4246 .kr(4) 4247 .sr(1) 4248 .m(4) 4249 .n(8) 4250 .k(k) 4251 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4252 } 4253 } 4254 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,k_lt_16_subtile)4255 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_lt_16_subtile) { 4256 TEST_REQUIRES_ARM_NEON_DOT; 4257 for (size_t k = 1; k < 16; k++) { 4258 for (uint32_t n = 1; n <= 8; n++) { 4259 for (uint32_t m = 1; m <= 4; m++) { 4260 GemmMicrokernelTester() 4261 .mr(4) 4262 .nr(8) 4263 .kr(4) 4264 .sr(1) 4265 .m(m) 4266 .n(n) 4267 .k(k) 4268 .iterations(1) 4269 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4270 } 4271 } 4272 } 4273 } 4274 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,k_gt_16)4275 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_gt_16) { 4276 TEST_REQUIRES_ARM_NEON_DOT; 4277 for (size_t k = 17; k < 32; k++) { 4278 GemmMicrokernelTester() 4279 .mr(4) 4280 .nr(8) 4281 .kr(4) 4282 .sr(1) 4283 .m(4) 4284 .n(8) 4285 .k(k) 4286 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4287 } 4288 } 4289 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,k_gt_16_subtile)4290 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_gt_16_subtile) { 4291 TEST_REQUIRES_ARM_NEON_DOT; 4292 for (size_t k = 17; k < 32; k++) { 4293 for (uint32_t n = 1; n <= 8; n++) { 4294 for (uint32_t m = 1; m <= 4; m++) { 4295 GemmMicrokernelTester() 4296 .mr(4) 4297 .nr(8) 4298 .kr(4) 4299 .sr(1) 4300 .m(m) 4301 .n(n) 4302 .k(k) 4303 .iterations(1) 4304 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4305 } 4306 } 4307 } 4308 } 4309 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,k_div_16)4310 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_div_16) { 4311 TEST_REQUIRES_ARM_NEON_DOT; 4312 for (size_t k = 32; k <= 160; k += 16) { 4313 GemmMicrokernelTester() 4314 .mr(4) 4315 .nr(8) 4316 .kr(4) 4317 .sr(1) 4318 .m(4) 4319 .n(8) 4320 .k(k) 4321 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4322 } 4323 } 4324 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,k_div_16_subtile)4325 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_div_16_subtile) { 4326 TEST_REQUIRES_ARM_NEON_DOT; 4327 for (size_t k = 32; k <= 160; k += 16) { 4328 for (uint32_t n = 1; n <= 8; n++) { 4329 for (uint32_t m = 1; m <= 4; m++) { 4330 GemmMicrokernelTester() 4331 .mr(4) 4332 .nr(8) 4333 .kr(4) 4334 .sr(1) 4335 .m(m) 4336 .n(n) 4337 .k(k) 4338 .iterations(1) 4339 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4340 } 4341 } 4342 } 4343 } 4344 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,n_gt_8)4345 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, n_gt_8) { 4346 TEST_REQUIRES_ARM_NEON_DOT; 4347 for (uint32_t n = 9; n < 16; n++) { 4348 for (size_t k = 1; k <= 80; k += 17) { 4349 GemmMicrokernelTester() 4350 .mr(4) 4351 .nr(8) 4352 .kr(4) 4353 .sr(1) 4354 .m(4) 4355 .n(n) 4356 .k(k) 4357 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4358 } 4359 } 4360 } 4361 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,n_gt_8_strided_cn)4362 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, n_gt_8_strided_cn) { 4363 TEST_REQUIRES_ARM_NEON_DOT; 4364 for (uint32_t n = 9; n < 16; n++) { 4365 for (size_t k = 1; k <= 80; k += 17) { 4366 GemmMicrokernelTester() 4367 .mr(4) 4368 .nr(8) 4369 .kr(4) 4370 .sr(1) 4371 .m(4) 4372 .n(n) 4373 .k(k) 4374 .cn_stride(11) 4375 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4376 } 4377 } 4378 } 4379 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,n_gt_8_subtile)4380 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, n_gt_8_subtile) { 4381 TEST_REQUIRES_ARM_NEON_DOT; 4382 for (uint32_t n = 9; n < 16; n++) { 4383 for (size_t k = 1; k <= 80; k += 17) { 4384 for (uint32_t m = 1; m <= 4; m++) { 4385 GemmMicrokernelTester() 4386 .mr(4) 4387 .nr(8) 4388 .kr(4) 4389 .sr(1) 4390 .m(m) 4391 .n(n) 4392 .k(k) 4393 .iterations(1) 4394 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4395 } 4396 } 4397 } 4398 } 4399 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,n_div_8)4400 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, n_div_8) { 4401 TEST_REQUIRES_ARM_NEON_DOT; 4402 for (uint32_t n = 16; n <= 24; n += 8) { 4403 for (size_t k = 1; k <= 80; k += 17) { 4404 GemmMicrokernelTester() 4405 .mr(4) 4406 .nr(8) 4407 .kr(4) 4408 .sr(1) 4409 .m(4) 4410 .n(n) 4411 .k(k) 4412 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4413 } 4414 } 4415 } 4416 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,n_div_8_strided_cn)4417 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, n_div_8_strided_cn) { 4418 TEST_REQUIRES_ARM_NEON_DOT; 4419 for (uint32_t n = 16; n <= 24; n += 8) { 4420 for (size_t k = 1; k <= 80; k += 17) { 4421 GemmMicrokernelTester() 4422 .mr(4) 4423 .nr(8) 4424 .kr(4) 4425 .sr(1) 4426 .m(4) 4427 .n(n) 4428 .k(k) 4429 .cn_stride(11) 4430 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4431 } 4432 } 4433 } 4434 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,n_div_8_subtile)4435 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, n_div_8_subtile) { 4436 TEST_REQUIRES_ARM_NEON_DOT; 4437 for (uint32_t n = 16; n <= 24; n += 8) { 4438 for (size_t k = 1; k <= 80; k += 17) { 4439 for (uint32_t m = 1; m <= 4; m++) { 4440 GemmMicrokernelTester() 4441 .mr(4) 4442 .nr(8) 4443 .kr(4) 4444 .sr(1) 4445 .m(m) 4446 .n(n) 4447 .k(k) 4448 .iterations(1) 4449 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4450 } 4451 } 4452 } 4453 } 4454 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,small_kernel)4455 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, small_kernel) { 4456 TEST_REQUIRES_ARM_NEON_DOT; 4457 for (size_t k = 1; k <= 80; k += 17) { 4458 GemmMicrokernelTester() 4459 .mr(4) 4460 .nr(8) 4461 .kr(4) 4462 .sr(1) 4463 .m(4) 4464 .n(8) 4465 .k(k) 4466 .ks(3) 4467 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4468 } 4469 } 4470 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,small_kernel_subtile)4471 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, small_kernel_subtile) { 4472 TEST_REQUIRES_ARM_NEON_DOT; 4473 for (size_t k = 1; k <= 80; k += 17) { 4474 for (uint32_t n = 1; n <= 8; n++) { 4475 for (uint32_t m = 1; m <= 4; m++) { 4476 GemmMicrokernelTester() 4477 .mr(4) 4478 .nr(8) 4479 .kr(4) 4480 .sr(1) 4481 .m(m) 4482 .n(n) 4483 .k(k) 4484 .ks(3) 4485 .iterations(1) 4486 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4487 } 4488 } 4489 } 4490 } 4491 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,n_gt_8_small_kernel)4492 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, n_gt_8_small_kernel) { 4493 TEST_REQUIRES_ARM_NEON_DOT; 4494 for (uint32_t n = 9; n < 16; n++) { 4495 for (size_t k = 1; k <= 80; k += 17) { 4496 GemmMicrokernelTester() 4497 .mr(4) 4498 .nr(8) 4499 .kr(4) 4500 .sr(1) 4501 .m(4) 4502 .n(n) 4503 .k(k) 4504 .ks(3) 4505 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4506 } 4507 } 4508 } 4509 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,n_div_8_small_kernel)4510 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, n_div_8_small_kernel) { 4511 TEST_REQUIRES_ARM_NEON_DOT; 4512 for (uint32_t n = 16; n <= 24; n += 8) { 4513 for (size_t k = 1; k <= 80; k += 17) { 4514 GemmMicrokernelTester() 4515 .mr(4) 4516 .nr(8) 4517 .kr(4) 4518 .sr(1) 4519 .m(4) 4520 .n(n) 4521 .k(k) 4522 .ks(3) 4523 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4524 } 4525 } 4526 } 4527 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,strided_cm_subtile)4528 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, strided_cm_subtile) { 4529 TEST_REQUIRES_ARM_NEON_DOT; 4530 for (size_t k = 1; k <= 80; k += 17) { 4531 for (uint32_t n = 1; n <= 8; n++) { 4532 for (uint32_t m = 1; m <= 4; m++) { 4533 GemmMicrokernelTester() 4534 .mr(4) 4535 .nr(8) 4536 .kr(4) 4537 .sr(1) 4538 .m(m) 4539 .n(n) 4540 .k(k) 4541 .cm_stride(11) 4542 .iterations(1) 4543 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4544 } 4545 } 4546 } 4547 } 4548 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,a_offset)4549 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, a_offset) { 4550 TEST_REQUIRES_ARM_NEON_DOT; 4551 for (size_t k = 1; k <= 80; k += 17) { 4552 GemmMicrokernelTester() 4553 .mr(4) 4554 .nr(8) 4555 .kr(4) 4556 .sr(1) 4557 .m(4) 4558 .n(8) 4559 .k(k) 4560 .ks(3) 4561 .a_offset(331) 4562 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4563 } 4564 } 4565 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,zero)4566 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, zero) { 4567 TEST_REQUIRES_ARM_NEON_DOT; 4568 for (size_t k = 1; k <= 80; k += 17) { 4569 for (uint32_t mz = 0; mz < 4; mz++) { 4570 GemmMicrokernelTester() 4571 .mr(4) 4572 .nr(8) 4573 .kr(4) 4574 .sr(1) 4575 .m(4) 4576 .n(8) 4577 .k(k) 4578 .ks(3) 4579 .a_offset(331) 4580 .zero_index(mz) 4581 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4582 } 4583 } 4584 } 4585 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,qmin)4586 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, qmin) { 4587 TEST_REQUIRES_ARM_NEON_DOT; 4588 GemmMicrokernelTester() 4589 .mr(4) 4590 .nr(8) 4591 .kr(4) 4592 .sr(1) 4593 .m(4) 4594 .n(8) 4595 .k(16) 4596 .qmin(128) 4597 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4598 } 4599 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,qmax)4600 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, qmax) { 4601 TEST_REQUIRES_ARM_NEON_DOT; 4602 GemmMicrokernelTester() 4603 .mr(4) 4604 .nr(8) 4605 .kr(4) 4606 .sr(1) 4607 .m(4) 4608 .n(8) 4609 .k(16) 4610 .qmax(128) 4611 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4612 } 4613 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,strided_cm)4614 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, strided_cm) { 4615 TEST_REQUIRES_ARM_NEON_DOT; 4616 GemmMicrokernelTester() 4617 .mr(4) 4618 .nr(8) 4619 .kr(4) 4620 .sr(1) 4621 .m(4) 4622 .n(8) 4623 .k(16) 4624 .cm_stride(11) 4625 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4626 } 4627 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,no_a_zero_point)4628 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, no_a_zero_point) { 4629 TEST_REQUIRES_ARM_NEON_DOT; 4630 for (size_t k = 1; k <= 80; k += 17) { 4631 GemmMicrokernelTester() 4632 .mr(4) 4633 .nr(8) 4634 .kr(4) 4635 .sr(1) 4636 .m(4) 4637 .n(8) 4638 .k(k) 4639 .a_zero_point(0) 4640 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4641 } 4642 } 4643 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,no_b_zero_point)4644 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, no_b_zero_point) { 4645 TEST_REQUIRES_ARM_NEON_DOT; 4646 for (size_t k = 1; k <= 80; k += 17) { 4647 GemmMicrokernelTester() 4648 .mr(4) 4649 .nr(8) 4650 .kr(4) 4651 .sr(1) 4652 .m(4) 4653 .n(8) 4654 .k(k) 4655 .b_zero_point(0) 4656 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4657 } 4658 } 4659 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128,no_zero_point)4660 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, no_zero_point) { 4661 TEST_REQUIRES_ARM_NEON_DOT; 4662 for (size_t k = 1; k <= 80; k += 17) { 4663 GemmMicrokernelTester() 4664 .mr(4) 4665 .nr(8) 4666 .kr(4) 4667 .sr(1) 4668 .m(4) 4669 .n(8) 4670 .k(k) 4671 .a_zero_point(0) 4672 .b_zero_point(0) 4673 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4674 } 4675 } 4676 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY 4677 4678 4679 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,k_eq_8)4680 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_eq_8) { 4681 TEST_REQUIRES_ARM_NEON_DOT; 4682 GemmMicrokernelTester() 4683 .mr(2) 4684 .nr(8) 4685 .kr(4) 4686 .sr(1) 4687 .m(2) 4688 .n(8) 4689 .k(8) 4690 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4691 } 4692 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,strided_cn)4693 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, strided_cn) { 4694 TEST_REQUIRES_ARM_NEON_DOT; 4695 GemmMicrokernelTester() 4696 .mr(2) 4697 .nr(8) 4698 .kr(4) 4699 .sr(1) 4700 .m(2) 4701 .n(8) 4702 .k(8) 4703 .cn_stride(11) 4704 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4705 } 4706 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,k_eq_8_subtile)4707 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_eq_8_subtile) { 4708 TEST_REQUIRES_ARM_NEON_DOT; 4709 for (uint32_t n = 1; n <= 8; n++) { 4710 for (uint32_t m = 1; m <= 2; m++) { 4711 GemmMicrokernelTester() 4712 .mr(2) 4713 .nr(8) 4714 .kr(4) 4715 .sr(1) 4716 .m(m) 4717 .n(n) 4718 .k(8) 4719 .iterations(1) 4720 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4721 } 4722 } 4723 } 4724 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,k_eq_8_subtile_m)4725 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_eq_8_subtile_m) { 4726 TEST_REQUIRES_ARM_NEON_DOT; 4727 for (uint32_t m = 1; m <= 2; m++) { 4728 GemmMicrokernelTester() 4729 .mr(2) 4730 .nr(8) 4731 .kr(4) 4732 .sr(1) 4733 .m(m) 4734 .n(8) 4735 .k(8) 4736 .iterations(1) 4737 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4738 } 4739 } 4740 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,k_eq_8_subtile_n)4741 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_eq_8_subtile_n) { 4742 TEST_REQUIRES_ARM_NEON_DOT; 4743 for (uint32_t n = 1; n <= 8; n++) { 4744 GemmMicrokernelTester() 4745 .mr(2) 4746 .nr(8) 4747 .kr(4) 4748 .sr(1) 4749 .m(2) 4750 .n(n) 4751 .k(8) 4752 .iterations(1) 4753 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4754 } 4755 } 4756 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,k_lt_8)4757 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_lt_8) { 4758 TEST_REQUIRES_ARM_NEON_DOT; 4759 for (size_t k = 1; k < 8; k++) { 4760 GemmMicrokernelTester() 4761 .mr(2) 4762 .nr(8) 4763 .kr(4) 4764 .sr(1) 4765 .m(2) 4766 .n(8) 4767 .k(k) 4768 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4769 } 4770 } 4771 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,k_lt_8_subtile)4772 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_lt_8_subtile) { 4773 TEST_REQUIRES_ARM_NEON_DOT; 4774 for (size_t k = 1; k < 8; k++) { 4775 for (uint32_t n = 1; n <= 8; n++) { 4776 for (uint32_t m = 1; m <= 2; m++) { 4777 GemmMicrokernelTester() 4778 .mr(2) 4779 .nr(8) 4780 .kr(4) 4781 .sr(1) 4782 .m(m) 4783 .n(n) 4784 .k(k) 4785 .iterations(1) 4786 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4787 } 4788 } 4789 } 4790 } 4791 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,k_gt_8)4792 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_gt_8) { 4793 TEST_REQUIRES_ARM_NEON_DOT; 4794 for (size_t k = 9; k < 16; k++) { 4795 GemmMicrokernelTester() 4796 .mr(2) 4797 .nr(8) 4798 .kr(4) 4799 .sr(1) 4800 .m(2) 4801 .n(8) 4802 .k(k) 4803 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4804 } 4805 } 4806 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,k_gt_8_subtile)4807 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_gt_8_subtile) { 4808 TEST_REQUIRES_ARM_NEON_DOT; 4809 for (size_t k = 9; k < 16; k++) { 4810 for (uint32_t n = 1; n <= 8; n++) { 4811 for (uint32_t m = 1; m <= 2; m++) { 4812 GemmMicrokernelTester() 4813 .mr(2) 4814 .nr(8) 4815 .kr(4) 4816 .sr(1) 4817 .m(m) 4818 .n(n) 4819 .k(k) 4820 .iterations(1) 4821 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4822 } 4823 } 4824 } 4825 } 4826 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,k_div_8)4827 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_div_8) { 4828 TEST_REQUIRES_ARM_NEON_DOT; 4829 for (size_t k = 16; k <= 80; k += 8) { 4830 GemmMicrokernelTester() 4831 .mr(2) 4832 .nr(8) 4833 .kr(4) 4834 .sr(1) 4835 .m(2) 4836 .n(8) 4837 .k(k) 4838 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4839 } 4840 } 4841 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,k_div_8_subtile)4842 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_div_8_subtile) { 4843 TEST_REQUIRES_ARM_NEON_DOT; 4844 for (size_t k = 16; k <= 80; k += 8) { 4845 for (uint32_t n = 1; n <= 8; n++) { 4846 for (uint32_t m = 1; m <= 2; m++) { 4847 GemmMicrokernelTester() 4848 .mr(2) 4849 .nr(8) 4850 .kr(4) 4851 .sr(1) 4852 .m(m) 4853 .n(n) 4854 .k(k) 4855 .iterations(1) 4856 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4857 } 4858 } 4859 } 4860 } 4861 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,n_gt_8)4862 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, n_gt_8) { 4863 TEST_REQUIRES_ARM_NEON_DOT; 4864 for (uint32_t n = 9; n < 16; n++) { 4865 for (size_t k = 1; k <= 40; k += 9) { 4866 GemmMicrokernelTester() 4867 .mr(2) 4868 .nr(8) 4869 .kr(4) 4870 .sr(1) 4871 .m(2) 4872 .n(n) 4873 .k(k) 4874 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4875 } 4876 } 4877 } 4878 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,n_gt_8_strided_cn)4879 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, n_gt_8_strided_cn) { 4880 TEST_REQUIRES_ARM_NEON_DOT; 4881 for (uint32_t n = 9; n < 16; n++) { 4882 for (size_t k = 1; k <= 40; k += 9) { 4883 GemmMicrokernelTester() 4884 .mr(2) 4885 .nr(8) 4886 .kr(4) 4887 .sr(1) 4888 .m(2) 4889 .n(n) 4890 .k(k) 4891 .cn_stride(11) 4892 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4893 } 4894 } 4895 } 4896 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,n_gt_8_subtile)4897 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, n_gt_8_subtile) { 4898 TEST_REQUIRES_ARM_NEON_DOT; 4899 for (uint32_t n = 9; n < 16; n++) { 4900 for (size_t k = 1; k <= 40; k += 9) { 4901 for (uint32_t m = 1; m <= 2; m++) { 4902 GemmMicrokernelTester() 4903 .mr(2) 4904 .nr(8) 4905 .kr(4) 4906 .sr(1) 4907 .m(m) 4908 .n(n) 4909 .k(k) 4910 .iterations(1) 4911 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4912 } 4913 } 4914 } 4915 } 4916 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,n_div_8)4917 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, n_div_8) { 4918 TEST_REQUIRES_ARM_NEON_DOT; 4919 for (uint32_t n = 16; n <= 24; n += 8) { 4920 for (size_t k = 1; k <= 40; k += 9) { 4921 GemmMicrokernelTester() 4922 .mr(2) 4923 .nr(8) 4924 .kr(4) 4925 .sr(1) 4926 .m(2) 4927 .n(n) 4928 .k(k) 4929 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4930 } 4931 } 4932 } 4933 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,n_div_8_strided_cn)4934 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, n_div_8_strided_cn) { 4935 TEST_REQUIRES_ARM_NEON_DOT; 4936 for (uint32_t n = 16; n <= 24; n += 8) { 4937 for (size_t k = 1; k <= 40; k += 9) { 4938 GemmMicrokernelTester() 4939 .mr(2) 4940 .nr(8) 4941 .kr(4) 4942 .sr(1) 4943 .m(2) 4944 .n(n) 4945 .k(k) 4946 .cn_stride(11) 4947 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4948 } 4949 } 4950 } 4951 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,n_div_8_subtile)4952 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, n_div_8_subtile) { 4953 TEST_REQUIRES_ARM_NEON_DOT; 4954 for (uint32_t n = 16; n <= 24; n += 8) { 4955 for (size_t k = 1; k <= 40; k += 9) { 4956 for (uint32_t m = 1; m <= 2; m++) { 4957 GemmMicrokernelTester() 4958 .mr(2) 4959 .nr(8) 4960 .kr(4) 4961 .sr(1) 4962 .m(m) 4963 .n(n) 4964 .k(k) 4965 .iterations(1) 4966 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4967 } 4968 } 4969 } 4970 } 4971 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,small_kernel)4972 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, small_kernel) { 4973 TEST_REQUIRES_ARM_NEON_DOT; 4974 for (size_t k = 1; k <= 40; k += 9) { 4975 GemmMicrokernelTester() 4976 .mr(2) 4977 .nr(8) 4978 .kr(4) 4979 .sr(1) 4980 .m(2) 4981 .n(8) 4982 .k(k) 4983 .ks(3) 4984 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 4985 } 4986 } 4987 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,small_kernel_subtile)4988 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, small_kernel_subtile) { 4989 TEST_REQUIRES_ARM_NEON_DOT; 4990 for (size_t k = 1; k <= 40; k += 9) { 4991 for (uint32_t n = 1; n <= 8; n++) { 4992 for (uint32_t m = 1; m <= 2; m++) { 4993 GemmMicrokernelTester() 4994 .mr(2) 4995 .nr(8) 4996 .kr(4) 4997 .sr(1) 4998 .m(m) 4999 .n(n) 5000 .k(k) 5001 .ks(3) 5002 .iterations(1) 5003 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5004 } 5005 } 5006 } 5007 } 5008 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,n_gt_8_small_kernel)5009 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, n_gt_8_small_kernel) { 5010 TEST_REQUIRES_ARM_NEON_DOT; 5011 for (uint32_t n = 9; n < 16; n++) { 5012 for (size_t k = 1; k <= 40; k += 9) { 5013 GemmMicrokernelTester() 5014 .mr(2) 5015 .nr(8) 5016 .kr(4) 5017 .sr(1) 5018 .m(2) 5019 .n(n) 5020 .k(k) 5021 .ks(3) 5022 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5023 } 5024 } 5025 } 5026 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,n_div_8_small_kernel)5027 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, n_div_8_small_kernel) { 5028 TEST_REQUIRES_ARM_NEON_DOT; 5029 for (uint32_t n = 16; n <= 24; n += 8) { 5030 for (size_t k = 1; k <= 40; k += 9) { 5031 GemmMicrokernelTester() 5032 .mr(2) 5033 .nr(8) 5034 .kr(4) 5035 .sr(1) 5036 .m(2) 5037 .n(n) 5038 .k(k) 5039 .ks(3) 5040 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5041 } 5042 } 5043 } 5044 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,strided_cm_subtile)5045 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, strided_cm_subtile) { 5046 TEST_REQUIRES_ARM_NEON_DOT; 5047 for (size_t k = 1; k <= 40; k += 9) { 5048 for (uint32_t n = 1; n <= 8; n++) { 5049 for (uint32_t m = 1; m <= 2; m++) { 5050 GemmMicrokernelTester() 5051 .mr(2) 5052 .nr(8) 5053 .kr(4) 5054 .sr(1) 5055 .m(m) 5056 .n(n) 5057 .k(k) 5058 .cm_stride(11) 5059 .iterations(1) 5060 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5061 } 5062 } 5063 } 5064 } 5065 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,a_offset)5066 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, a_offset) { 5067 TEST_REQUIRES_ARM_NEON_DOT; 5068 for (size_t k = 1; k <= 40; k += 9) { 5069 GemmMicrokernelTester() 5070 .mr(2) 5071 .nr(8) 5072 .kr(4) 5073 .sr(1) 5074 .m(2) 5075 .n(8) 5076 .k(k) 5077 .ks(3) 5078 .a_offset(83) 5079 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5080 } 5081 } 5082 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,zero)5083 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, zero) { 5084 TEST_REQUIRES_ARM_NEON_DOT; 5085 for (size_t k = 1; k <= 40; k += 9) { 5086 for (uint32_t mz = 0; mz < 2; mz++) { 5087 GemmMicrokernelTester() 5088 .mr(2) 5089 .nr(8) 5090 .kr(4) 5091 .sr(1) 5092 .m(2) 5093 .n(8) 5094 .k(k) 5095 .ks(3) 5096 .a_offset(83) 5097 .zero_index(mz) 5098 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5099 } 5100 } 5101 } 5102 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,qmin)5103 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, qmin) { 5104 TEST_REQUIRES_ARM_NEON_DOT; 5105 GemmMicrokernelTester() 5106 .mr(2) 5107 .nr(8) 5108 .kr(4) 5109 .sr(1) 5110 .m(2) 5111 .n(8) 5112 .k(8) 5113 .qmin(128) 5114 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5115 } 5116 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,qmax)5117 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, qmax) { 5118 TEST_REQUIRES_ARM_NEON_DOT; 5119 GemmMicrokernelTester() 5120 .mr(2) 5121 .nr(8) 5122 .kr(4) 5123 .sr(1) 5124 .m(2) 5125 .n(8) 5126 .k(8) 5127 .qmax(128) 5128 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5129 } 5130 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,strided_cm)5131 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, strided_cm) { 5132 TEST_REQUIRES_ARM_NEON_DOT; 5133 GemmMicrokernelTester() 5134 .mr(2) 5135 .nr(8) 5136 .kr(4) 5137 .sr(1) 5138 .m(2) 5139 .n(8) 5140 .k(8) 5141 .cm_stride(11) 5142 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5143 } 5144 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,no_a_zero_point)5145 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, no_a_zero_point) { 5146 TEST_REQUIRES_ARM_NEON_DOT; 5147 for (size_t k = 1; k <= 40; k += 9) { 5148 GemmMicrokernelTester() 5149 .mr(2) 5150 .nr(8) 5151 .kr(4) 5152 .sr(1) 5153 .m(2) 5154 .n(8) 5155 .k(k) 5156 .a_zero_point(0) 5157 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5158 } 5159 } 5160 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,no_b_zero_point)5161 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, no_b_zero_point) { 5162 TEST_REQUIRES_ARM_NEON_DOT; 5163 for (size_t k = 1; k <= 40; k += 9) { 5164 GemmMicrokernelTester() 5165 .mr(2) 5166 .nr(8) 5167 .kr(4) 5168 .sr(1) 5169 .m(2) 5170 .n(8) 5171 .k(k) 5172 .b_zero_point(0) 5173 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5174 } 5175 } 5176 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT,no_zero_point)5177 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, no_zero_point) { 5178 TEST_REQUIRES_ARM_NEON_DOT; 5179 for (size_t k = 1; k <= 40; k += 9) { 5180 GemmMicrokernelTester() 5181 .mr(2) 5182 .nr(8) 5183 .kr(4) 5184 .sr(1) 5185 .m(2) 5186 .n(8) 5187 .k(k) 5188 .a_zero_point(0) 5189 .b_zero_point(0) 5190 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5191 } 5192 } 5193 #endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64 5194 5195 5196 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,k_eq_8)5197 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_eq_8) { 5198 TEST_REQUIRES_ARM_NEON_DOT; 5199 GemmMicrokernelTester() 5200 .mr(3) 5201 .nr(8) 5202 .kr(4) 5203 .sr(1) 5204 .m(3) 5205 .n(8) 5206 .k(8) 5207 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5208 } 5209 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,strided_cn)5210 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, strided_cn) { 5211 TEST_REQUIRES_ARM_NEON_DOT; 5212 GemmMicrokernelTester() 5213 .mr(3) 5214 .nr(8) 5215 .kr(4) 5216 .sr(1) 5217 .m(3) 5218 .n(8) 5219 .k(8) 5220 .cn_stride(11) 5221 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5222 } 5223 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,k_eq_8_subtile)5224 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_eq_8_subtile) { 5225 TEST_REQUIRES_ARM_NEON_DOT; 5226 for (uint32_t n = 1; n <= 8; n++) { 5227 for (uint32_t m = 1; m <= 3; m++) { 5228 GemmMicrokernelTester() 5229 .mr(3) 5230 .nr(8) 5231 .kr(4) 5232 .sr(1) 5233 .m(m) 5234 .n(n) 5235 .k(8) 5236 .iterations(1) 5237 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5238 } 5239 } 5240 } 5241 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,k_eq_8_subtile_m)5242 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_eq_8_subtile_m) { 5243 TEST_REQUIRES_ARM_NEON_DOT; 5244 for (uint32_t m = 1; m <= 3; m++) { 5245 GemmMicrokernelTester() 5246 .mr(3) 5247 .nr(8) 5248 .kr(4) 5249 .sr(1) 5250 .m(m) 5251 .n(8) 5252 .k(8) 5253 .iterations(1) 5254 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5255 } 5256 } 5257 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,k_eq_8_subtile_n)5258 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_eq_8_subtile_n) { 5259 TEST_REQUIRES_ARM_NEON_DOT; 5260 for (uint32_t n = 1; n <= 8; n++) { 5261 GemmMicrokernelTester() 5262 .mr(3) 5263 .nr(8) 5264 .kr(4) 5265 .sr(1) 5266 .m(3) 5267 .n(n) 5268 .k(8) 5269 .iterations(1) 5270 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5271 } 5272 } 5273 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,k_lt_8)5274 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_lt_8) { 5275 TEST_REQUIRES_ARM_NEON_DOT; 5276 for (size_t k = 1; k < 8; k++) { 5277 GemmMicrokernelTester() 5278 .mr(3) 5279 .nr(8) 5280 .kr(4) 5281 .sr(1) 5282 .m(3) 5283 .n(8) 5284 .k(k) 5285 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5286 } 5287 } 5288 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,k_lt_8_subtile)5289 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_lt_8_subtile) { 5290 TEST_REQUIRES_ARM_NEON_DOT; 5291 for (size_t k = 1; k < 8; k++) { 5292 for (uint32_t n = 1; n <= 8; n++) { 5293 for (uint32_t m = 1; m <= 3; m++) { 5294 GemmMicrokernelTester() 5295 .mr(3) 5296 .nr(8) 5297 .kr(4) 5298 .sr(1) 5299 .m(m) 5300 .n(n) 5301 .k(k) 5302 .iterations(1) 5303 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5304 } 5305 } 5306 } 5307 } 5308 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,k_gt_8)5309 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_gt_8) { 5310 TEST_REQUIRES_ARM_NEON_DOT; 5311 for (size_t k = 9; k < 16; k++) { 5312 GemmMicrokernelTester() 5313 .mr(3) 5314 .nr(8) 5315 .kr(4) 5316 .sr(1) 5317 .m(3) 5318 .n(8) 5319 .k(k) 5320 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5321 } 5322 } 5323 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,k_gt_8_subtile)5324 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_gt_8_subtile) { 5325 TEST_REQUIRES_ARM_NEON_DOT; 5326 for (size_t k = 9; k < 16; k++) { 5327 for (uint32_t n = 1; n <= 8; n++) { 5328 for (uint32_t m = 1; m <= 3; m++) { 5329 GemmMicrokernelTester() 5330 .mr(3) 5331 .nr(8) 5332 .kr(4) 5333 .sr(1) 5334 .m(m) 5335 .n(n) 5336 .k(k) 5337 .iterations(1) 5338 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5339 } 5340 } 5341 } 5342 } 5343 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,k_div_8)5344 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_div_8) { 5345 TEST_REQUIRES_ARM_NEON_DOT; 5346 for (size_t k = 16; k <= 80; k += 8) { 5347 GemmMicrokernelTester() 5348 .mr(3) 5349 .nr(8) 5350 .kr(4) 5351 .sr(1) 5352 .m(3) 5353 .n(8) 5354 .k(k) 5355 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5356 } 5357 } 5358 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,k_div_8_subtile)5359 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_div_8_subtile) { 5360 TEST_REQUIRES_ARM_NEON_DOT; 5361 for (size_t k = 16; k <= 80; k += 8) { 5362 for (uint32_t n = 1; n <= 8; n++) { 5363 for (uint32_t m = 1; m <= 3; m++) { 5364 GemmMicrokernelTester() 5365 .mr(3) 5366 .nr(8) 5367 .kr(4) 5368 .sr(1) 5369 .m(m) 5370 .n(n) 5371 .k(k) 5372 .iterations(1) 5373 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5374 } 5375 } 5376 } 5377 } 5378 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,n_gt_8)5379 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, n_gt_8) { 5380 TEST_REQUIRES_ARM_NEON_DOT; 5381 for (uint32_t n = 9; n < 16; n++) { 5382 for (size_t k = 1; k <= 40; k += 9) { 5383 GemmMicrokernelTester() 5384 .mr(3) 5385 .nr(8) 5386 .kr(4) 5387 .sr(1) 5388 .m(3) 5389 .n(n) 5390 .k(k) 5391 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5392 } 5393 } 5394 } 5395 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,n_gt_8_strided_cn)5396 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, n_gt_8_strided_cn) { 5397 TEST_REQUIRES_ARM_NEON_DOT; 5398 for (uint32_t n = 9; n < 16; n++) { 5399 for (size_t k = 1; k <= 40; k += 9) { 5400 GemmMicrokernelTester() 5401 .mr(3) 5402 .nr(8) 5403 .kr(4) 5404 .sr(1) 5405 .m(3) 5406 .n(n) 5407 .k(k) 5408 .cn_stride(11) 5409 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5410 } 5411 } 5412 } 5413 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,n_gt_8_subtile)5414 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, n_gt_8_subtile) { 5415 TEST_REQUIRES_ARM_NEON_DOT; 5416 for (uint32_t n = 9; n < 16; n++) { 5417 for (size_t k = 1; k <= 40; k += 9) { 5418 for (uint32_t m = 1; m <= 3; m++) { 5419 GemmMicrokernelTester() 5420 .mr(3) 5421 .nr(8) 5422 .kr(4) 5423 .sr(1) 5424 .m(m) 5425 .n(n) 5426 .k(k) 5427 .iterations(1) 5428 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5429 } 5430 } 5431 } 5432 } 5433 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,n_div_8)5434 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, n_div_8) { 5435 TEST_REQUIRES_ARM_NEON_DOT; 5436 for (uint32_t n = 16; n <= 24; n += 8) { 5437 for (size_t k = 1; k <= 40; k += 9) { 5438 GemmMicrokernelTester() 5439 .mr(3) 5440 .nr(8) 5441 .kr(4) 5442 .sr(1) 5443 .m(3) 5444 .n(n) 5445 .k(k) 5446 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5447 } 5448 } 5449 } 5450 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,n_div_8_strided_cn)5451 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, n_div_8_strided_cn) { 5452 TEST_REQUIRES_ARM_NEON_DOT; 5453 for (uint32_t n = 16; n <= 24; n += 8) { 5454 for (size_t k = 1; k <= 40; k += 9) { 5455 GemmMicrokernelTester() 5456 .mr(3) 5457 .nr(8) 5458 .kr(4) 5459 .sr(1) 5460 .m(3) 5461 .n(n) 5462 .k(k) 5463 .cn_stride(11) 5464 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5465 } 5466 } 5467 } 5468 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,n_div_8_subtile)5469 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, n_div_8_subtile) { 5470 TEST_REQUIRES_ARM_NEON_DOT; 5471 for (uint32_t n = 16; n <= 24; n += 8) { 5472 for (size_t k = 1; k <= 40; k += 9) { 5473 for (uint32_t m = 1; m <= 3; m++) { 5474 GemmMicrokernelTester() 5475 .mr(3) 5476 .nr(8) 5477 .kr(4) 5478 .sr(1) 5479 .m(m) 5480 .n(n) 5481 .k(k) 5482 .iterations(1) 5483 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5484 } 5485 } 5486 } 5487 } 5488 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,small_kernel)5489 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, small_kernel) { 5490 TEST_REQUIRES_ARM_NEON_DOT; 5491 for (size_t k = 1; k <= 40; k += 9) { 5492 GemmMicrokernelTester() 5493 .mr(3) 5494 .nr(8) 5495 .kr(4) 5496 .sr(1) 5497 .m(3) 5498 .n(8) 5499 .k(k) 5500 .ks(3) 5501 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5502 } 5503 } 5504 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,small_kernel_subtile)5505 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, small_kernel_subtile) { 5506 TEST_REQUIRES_ARM_NEON_DOT; 5507 for (size_t k = 1; k <= 40; k += 9) { 5508 for (uint32_t n = 1; n <= 8; n++) { 5509 for (uint32_t m = 1; m <= 3; m++) { 5510 GemmMicrokernelTester() 5511 .mr(3) 5512 .nr(8) 5513 .kr(4) 5514 .sr(1) 5515 .m(m) 5516 .n(n) 5517 .k(k) 5518 .ks(3) 5519 .iterations(1) 5520 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5521 } 5522 } 5523 } 5524 } 5525 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,n_gt_8_small_kernel)5526 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, n_gt_8_small_kernel) { 5527 TEST_REQUIRES_ARM_NEON_DOT; 5528 for (uint32_t n = 9; n < 16; n++) { 5529 for (size_t k = 1; k <= 40; k += 9) { 5530 GemmMicrokernelTester() 5531 .mr(3) 5532 .nr(8) 5533 .kr(4) 5534 .sr(1) 5535 .m(3) 5536 .n(n) 5537 .k(k) 5538 .ks(3) 5539 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5540 } 5541 } 5542 } 5543 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,n_div_8_small_kernel)5544 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, n_div_8_small_kernel) { 5545 TEST_REQUIRES_ARM_NEON_DOT; 5546 for (uint32_t n = 16; n <= 24; n += 8) { 5547 for (size_t k = 1; k <= 40; k += 9) { 5548 GemmMicrokernelTester() 5549 .mr(3) 5550 .nr(8) 5551 .kr(4) 5552 .sr(1) 5553 .m(3) 5554 .n(n) 5555 .k(k) 5556 .ks(3) 5557 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5558 } 5559 } 5560 } 5561 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,strided_cm_subtile)5562 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, strided_cm_subtile) { 5563 TEST_REQUIRES_ARM_NEON_DOT; 5564 for (size_t k = 1; k <= 40; k += 9) { 5565 for (uint32_t n = 1; n <= 8; n++) { 5566 for (uint32_t m = 1; m <= 3; m++) { 5567 GemmMicrokernelTester() 5568 .mr(3) 5569 .nr(8) 5570 .kr(4) 5571 .sr(1) 5572 .m(m) 5573 .n(n) 5574 .k(k) 5575 .cm_stride(11) 5576 .iterations(1) 5577 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5578 } 5579 } 5580 } 5581 } 5582 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,a_offset)5583 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, a_offset) { 5584 TEST_REQUIRES_ARM_NEON_DOT; 5585 for (size_t k = 1; k <= 40; k += 9) { 5586 GemmMicrokernelTester() 5587 .mr(3) 5588 .nr(8) 5589 .kr(4) 5590 .sr(1) 5591 .m(3) 5592 .n(8) 5593 .k(k) 5594 .ks(3) 5595 .a_offset(127) 5596 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5597 } 5598 } 5599 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,zero)5600 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, zero) { 5601 TEST_REQUIRES_ARM_NEON_DOT; 5602 for (size_t k = 1; k <= 40; k += 9) { 5603 for (uint32_t mz = 0; mz < 3; mz++) { 5604 GemmMicrokernelTester() 5605 .mr(3) 5606 .nr(8) 5607 .kr(4) 5608 .sr(1) 5609 .m(3) 5610 .n(8) 5611 .k(k) 5612 .ks(3) 5613 .a_offset(127) 5614 .zero_index(mz) 5615 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5616 } 5617 } 5618 } 5619 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,qmin)5620 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, qmin) { 5621 TEST_REQUIRES_ARM_NEON_DOT; 5622 GemmMicrokernelTester() 5623 .mr(3) 5624 .nr(8) 5625 .kr(4) 5626 .sr(1) 5627 .m(3) 5628 .n(8) 5629 .k(8) 5630 .qmin(128) 5631 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5632 } 5633 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,qmax)5634 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, qmax) { 5635 TEST_REQUIRES_ARM_NEON_DOT; 5636 GemmMicrokernelTester() 5637 .mr(3) 5638 .nr(8) 5639 .kr(4) 5640 .sr(1) 5641 .m(3) 5642 .n(8) 5643 .k(8) 5644 .qmax(128) 5645 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5646 } 5647 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,strided_cm)5648 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, strided_cm) { 5649 TEST_REQUIRES_ARM_NEON_DOT; 5650 GemmMicrokernelTester() 5651 .mr(3) 5652 .nr(8) 5653 .kr(4) 5654 .sr(1) 5655 .m(3) 5656 .n(8) 5657 .k(8) 5658 .cm_stride(11) 5659 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5660 } 5661 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,no_a_zero_point)5662 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, no_a_zero_point) { 5663 TEST_REQUIRES_ARM_NEON_DOT; 5664 for (size_t k = 1; k <= 40; k += 9) { 5665 GemmMicrokernelTester() 5666 .mr(3) 5667 .nr(8) 5668 .kr(4) 5669 .sr(1) 5670 .m(3) 5671 .n(8) 5672 .k(k) 5673 .a_zero_point(0) 5674 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5675 } 5676 } 5677 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,no_b_zero_point)5678 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, no_b_zero_point) { 5679 TEST_REQUIRES_ARM_NEON_DOT; 5680 for (size_t k = 1; k <= 40; k += 9) { 5681 GemmMicrokernelTester() 5682 .mr(3) 5683 .nr(8) 5684 .kr(4) 5685 .sr(1) 5686 .m(3) 5687 .n(8) 5688 .k(k) 5689 .b_zero_point(0) 5690 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5691 } 5692 } 5693 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT,no_zero_point)5694 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, no_zero_point) { 5695 TEST_REQUIRES_ARM_NEON_DOT; 5696 for (size_t k = 1; k <= 40; k += 9) { 5697 GemmMicrokernelTester() 5698 .mr(3) 5699 .nr(8) 5700 .kr(4) 5701 .sr(1) 5702 .m(3) 5703 .n(8) 5704 .k(k) 5705 .a_zero_point(0) 5706 .b_zero_point(0) 5707 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5708 } 5709 } 5710 #endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64 5711 5712 5713 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,k_eq_8)5714 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_eq_8) { 5715 TEST_REQUIRES_ARM_NEON_DOT; 5716 GemmMicrokernelTester() 5717 .mr(4) 5718 .nr(8) 5719 .kr(4) 5720 .sr(1) 5721 .m(4) 5722 .n(8) 5723 .k(8) 5724 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5725 } 5726 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,strided_cn)5727 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, strided_cn) { 5728 TEST_REQUIRES_ARM_NEON_DOT; 5729 GemmMicrokernelTester() 5730 .mr(4) 5731 .nr(8) 5732 .kr(4) 5733 .sr(1) 5734 .m(4) 5735 .n(8) 5736 .k(8) 5737 .cn_stride(11) 5738 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5739 } 5740 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,k_eq_8_subtile)5741 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_eq_8_subtile) { 5742 TEST_REQUIRES_ARM_NEON_DOT; 5743 for (uint32_t n = 1; n <= 8; n++) { 5744 for (uint32_t m = 1; m <= 4; m++) { 5745 GemmMicrokernelTester() 5746 .mr(4) 5747 .nr(8) 5748 .kr(4) 5749 .sr(1) 5750 .m(m) 5751 .n(n) 5752 .k(8) 5753 .iterations(1) 5754 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5755 } 5756 } 5757 } 5758 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,k_eq_8_subtile_m)5759 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_eq_8_subtile_m) { 5760 TEST_REQUIRES_ARM_NEON_DOT; 5761 for (uint32_t m = 1; m <= 4; m++) { 5762 GemmMicrokernelTester() 5763 .mr(4) 5764 .nr(8) 5765 .kr(4) 5766 .sr(1) 5767 .m(m) 5768 .n(8) 5769 .k(8) 5770 .iterations(1) 5771 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5772 } 5773 } 5774 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,k_eq_8_subtile_n)5775 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_eq_8_subtile_n) { 5776 TEST_REQUIRES_ARM_NEON_DOT; 5777 for (uint32_t n = 1; n <= 8; n++) { 5778 GemmMicrokernelTester() 5779 .mr(4) 5780 .nr(8) 5781 .kr(4) 5782 .sr(1) 5783 .m(4) 5784 .n(n) 5785 .k(8) 5786 .iterations(1) 5787 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5788 } 5789 } 5790 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,k_lt_8)5791 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_lt_8) { 5792 TEST_REQUIRES_ARM_NEON_DOT; 5793 for (size_t k = 1; k < 8; k++) { 5794 GemmMicrokernelTester() 5795 .mr(4) 5796 .nr(8) 5797 .kr(4) 5798 .sr(1) 5799 .m(4) 5800 .n(8) 5801 .k(k) 5802 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5803 } 5804 } 5805 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,k_lt_8_subtile)5806 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_lt_8_subtile) { 5807 TEST_REQUIRES_ARM_NEON_DOT; 5808 for (size_t k = 1; k < 8; k++) { 5809 for (uint32_t n = 1; n <= 8; n++) { 5810 for (uint32_t m = 1; m <= 4; m++) { 5811 GemmMicrokernelTester() 5812 .mr(4) 5813 .nr(8) 5814 .kr(4) 5815 .sr(1) 5816 .m(m) 5817 .n(n) 5818 .k(k) 5819 .iterations(1) 5820 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5821 } 5822 } 5823 } 5824 } 5825 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,k_gt_8)5826 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_gt_8) { 5827 TEST_REQUIRES_ARM_NEON_DOT; 5828 for (size_t k = 9; k < 16; k++) { 5829 GemmMicrokernelTester() 5830 .mr(4) 5831 .nr(8) 5832 .kr(4) 5833 .sr(1) 5834 .m(4) 5835 .n(8) 5836 .k(k) 5837 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5838 } 5839 } 5840 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,k_gt_8_subtile)5841 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_gt_8_subtile) { 5842 TEST_REQUIRES_ARM_NEON_DOT; 5843 for (size_t k = 9; k < 16; k++) { 5844 for (uint32_t n = 1; n <= 8; n++) { 5845 for (uint32_t m = 1; m <= 4; m++) { 5846 GemmMicrokernelTester() 5847 .mr(4) 5848 .nr(8) 5849 .kr(4) 5850 .sr(1) 5851 .m(m) 5852 .n(n) 5853 .k(k) 5854 .iterations(1) 5855 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5856 } 5857 } 5858 } 5859 } 5860 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,k_div_8)5861 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_div_8) { 5862 TEST_REQUIRES_ARM_NEON_DOT; 5863 for (size_t k = 16; k <= 80; k += 8) { 5864 GemmMicrokernelTester() 5865 .mr(4) 5866 .nr(8) 5867 .kr(4) 5868 .sr(1) 5869 .m(4) 5870 .n(8) 5871 .k(k) 5872 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5873 } 5874 } 5875 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,k_div_8_subtile)5876 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_div_8_subtile) { 5877 TEST_REQUIRES_ARM_NEON_DOT; 5878 for (size_t k = 16; k <= 80; k += 8) { 5879 for (uint32_t n = 1; n <= 8; n++) { 5880 for (uint32_t m = 1; m <= 4; m++) { 5881 GemmMicrokernelTester() 5882 .mr(4) 5883 .nr(8) 5884 .kr(4) 5885 .sr(1) 5886 .m(m) 5887 .n(n) 5888 .k(k) 5889 .iterations(1) 5890 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5891 } 5892 } 5893 } 5894 } 5895 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,n_gt_8)5896 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_gt_8) { 5897 TEST_REQUIRES_ARM_NEON_DOT; 5898 for (uint32_t n = 9; n < 16; n++) { 5899 for (size_t k = 1; k <= 40; k += 9) { 5900 GemmMicrokernelTester() 5901 .mr(4) 5902 .nr(8) 5903 .kr(4) 5904 .sr(1) 5905 .m(4) 5906 .n(n) 5907 .k(k) 5908 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5909 } 5910 } 5911 } 5912 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,n_gt_8_strided_cn)5913 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_gt_8_strided_cn) { 5914 TEST_REQUIRES_ARM_NEON_DOT; 5915 for (uint32_t n = 9; n < 16; n++) { 5916 for (size_t k = 1; k <= 40; k += 9) { 5917 GemmMicrokernelTester() 5918 .mr(4) 5919 .nr(8) 5920 .kr(4) 5921 .sr(1) 5922 .m(4) 5923 .n(n) 5924 .k(k) 5925 .cn_stride(11) 5926 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5927 } 5928 } 5929 } 5930 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,n_gt_8_subtile)5931 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_gt_8_subtile) { 5932 TEST_REQUIRES_ARM_NEON_DOT; 5933 for (uint32_t n = 9; n < 16; n++) { 5934 for (size_t k = 1; k <= 40; k += 9) { 5935 for (uint32_t m = 1; m <= 4; m++) { 5936 GemmMicrokernelTester() 5937 .mr(4) 5938 .nr(8) 5939 .kr(4) 5940 .sr(1) 5941 .m(m) 5942 .n(n) 5943 .k(k) 5944 .iterations(1) 5945 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5946 } 5947 } 5948 } 5949 } 5950 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,n_div_8)5951 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_div_8) { 5952 TEST_REQUIRES_ARM_NEON_DOT; 5953 for (uint32_t n = 16; n <= 24; n += 8) { 5954 for (size_t k = 1; k <= 40; k += 9) { 5955 GemmMicrokernelTester() 5956 .mr(4) 5957 .nr(8) 5958 .kr(4) 5959 .sr(1) 5960 .m(4) 5961 .n(n) 5962 .k(k) 5963 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5964 } 5965 } 5966 } 5967 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,n_div_8_strided_cn)5968 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_div_8_strided_cn) { 5969 TEST_REQUIRES_ARM_NEON_DOT; 5970 for (uint32_t n = 16; n <= 24; n += 8) { 5971 for (size_t k = 1; k <= 40; k += 9) { 5972 GemmMicrokernelTester() 5973 .mr(4) 5974 .nr(8) 5975 .kr(4) 5976 .sr(1) 5977 .m(4) 5978 .n(n) 5979 .k(k) 5980 .cn_stride(11) 5981 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 5982 } 5983 } 5984 } 5985 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,n_div_8_subtile)5986 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_div_8_subtile) { 5987 TEST_REQUIRES_ARM_NEON_DOT; 5988 for (uint32_t n = 16; n <= 24; n += 8) { 5989 for (size_t k = 1; k <= 40; k += 9) { 5990 for (uint32_t m = 1; m <= 4; m++) { 5991 GemmMicrokernelTester() 5992 .mr(4) 5993 .nr(8) 5994 .kr(4) 5995 .sr(1) 5996 .m(m) 5997 .n(n) 5998 .k(k) 5999 .iterations(1) 6000 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6001 } 6002 } 6003 } 6004 } 6005 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,small_kernel)6006 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, small_kernel) { 6007 TEST_REQUIRES_ARM_NEON_DOT; 6008 for (size_t k = 1; k <= 40; k += 9) { 6009 GemmMicrokernelTester() 6010 .mr(4) 6011 .nr(8) 6012 .kr(4) 6013 .sr(1) 6014 .m(4) 6015 .n(8) 6016 .k(k) 6017 .ks(3) 6018 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6019 } 6020 } 6021 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,small_kernel_subtile)6022 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, small_kernel_subtile) { 6023 TEST_REQUIRES_ARM_NEON_DOT; 6024 for (size_t k = 1; k <= 40; k += 9) { 6025 for (uint32_t n = 1; n <= 8; n++) { 6026 for (uint32_t m = 1; m <= 4; m++) { 6027 GemmMicrokernelTester() 6028 .mr(4) 6029 .nr(8) 6030 .kr(4) 6031 .sr(1) 6032 .m(m) 6033 .n(n) 6034 .k(k) 6035 .ks(3) 6036 .iterations(1) 6037 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6038 } 6039 } 6040 } 6041 } 6042 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,n_gt_8_small_kernel)6043 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_gt_8_small_kernel) { 6044 TEST_REQUIRES_ARM_NEON_DOT; 6045 for (uint32_t n = 9; n < 16; n++) { 6046 for (size_t k = 1; k <= 40; k += 9) { 6047 GemmMicrokernelTester() 6048 .mr(4) 6049 .nr(8) 6050 .kr(4) 6051 .sr(1) 6052 .m(4) 6053 .n(n) 6054 .k(k) 6055 .ks(3) 6056 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6057 } 6058 } 6059 } 6060 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,n_div_8_small_kernel)6061 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_div_8_small_kernel) { 6062 TEST_REQUIRES_ARM_NEON_DOT; 6063 for (uint32_t n = 16; n <= 24; n += 8) { 6064 for (size_t k = 1; k <= 40; k += 9) { 6065 GemmMicrokernelTester() 6066 .mr(4) 6067 .nr(8) 6068 .kr(4) 6069 .sr(1) 6070 .m(4) 6071 .n(n) 6072 .k(k) 6073 .ks(3) 6074 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6075 } 6076 } 6077 } 6078 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,strided_cm_subtile)6079 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, strided_cm_subtile) { 6080 TEST_REQUIRES_ARM_NEON_DOT; 6081 for (size_t k = 1; k <= 40; k += 9) { 6082 for (uint32_t n = 1; n <= 8; n++) { 6083 for (uint32_t m = 1; m <= 4; m++) { 6084 GemmMicrokernelTester() 6085 .mr(4) 6086 .nr(8) 6087 .kr(4) 6088 .sr(1) 6089 .m(m) 6090 .n(n) 6091 .k(k) 6092 .cm_stride(11) 6093 .iterations(1) 6094 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6095 } 6096 } 6097 } 6098 } 6099 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,a_offset)6100 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, a_offset) { 6101 TEST_REQUIRES_ARM_NEON_DOT; 6102 for (size_t k = 1; k <= 40; k += 9) { 6103 GemmMicrokernelTester() 6104 .mr(4) 6105 .nr(8) 6106 .kr(4) 6107 .sr(1) 6108 .m(4) 6109 .n(8) 6110 .k(k) 6111 .ks(3) 6112 .a_offset(163) 6113 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6114 } 6115 } 6116 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,zero)6117 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, zero) { 6118 TEST_REQUIRES_ARM_NEON_DOT; 6119 for (size_t k = 1; k <= 40; k += 9) { 6120 for (uint32_t mz = 0; mz < 4; mz++) { 6121 GemmMicrokernelTester() 6122 .mr(4) 6123 .nr(8) 6124 .kr(4) 6125 .sr(1) 6126 .m(4) 6127 .n(8) 6128 .k(k) 6129 .ks(3) 6130 .a_offset(163) 6131 .zero_index(mz) 6132 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6133 } 6134 } 6135 } 6136 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,qmin)6137 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, qmin) { 6138 TEST_REQUIRES_ARM_NEON_DOT; 6139 GemmMicrokernelTester() 6140 .mr(4) 6141 .nr(8) 6142 .kr(4) 6143 .sr(1) 6144 .m(4) 6145 .n(8) 6146 .k(8) 6147 .qmin(128) 6148 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6149 } 6150 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,qmax)6151 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, qmax) { 6152 TEST_REQUIRES_ARM_NEON_DOT; 6153 GemmMicrokernelTester() 6154 .mr(4) 6155 .nr(8) 6156 .kr(4) 6157 .sr(1) 6158 .m(4) 6159 .n(8) 6160 .k(8) 6161 .qmax(128) 6162 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6163 } 6164 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,strided_cm)6165 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, strided_cm) { 6166 TEST_REQUIRES_ARM_NEON_DOT; 6167 GemmMicrokernelTester() 6168 .mr(4) 6169 .nr(8) 6170 .kr(4) 6171 .sr(1) 6172 .m(4) 6173 .n(8) 6174 .k(8) 6175 .cm_stride(11) 6176 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6177 } 6178 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,no_a_zero_point)6179 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, no_a_zero_point) { 6180 TEST_REQUIRES_ARM_NEON_DOT; 6181 for (size_t k = 1; k <= 40; k += 9) { 6182 GemmMicrokernelTester() 6183 .mr(4) 6184 .nr(8) 6185 .kr(4) 6186 .sr(1) 6187 .m(4) 6188 .n(8) 6189 .k(k) 6190 .a_zero_point(0) 6191 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6192 } 6193 } 6194 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,no_b_zero_point)6195 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, no_b_zero_point) { 6196 TEST_REQUIRES_ARM_NEON_DOT; 6197 for (size_t k = 1; k <= 40; k += 9) { 6198 GemmMicrokernelTester() 6199 .mr(4) 6200 .nr(8) 6201 .kr(4) 6202 .sr(1) 6203 .m(4) 6204 .n(8) 6205 .k(k) 6206 .b_zero_point(0) 6207 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6208 } 6209 } 6210 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT,no_zero_point)6211 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, no_zero_point) { 6212 TEST_REQUIRES_ARM_NEON_DOT; 6213 for (size_t k = 1; k <= 40; k += 9) { 6214 GemmMicrokernelTester() 6215 .mr(4) 6216 .nr(8) 6217 .kr(4) 6218 .sr(1) 6219 .m(4) 6220 .n(8) 6221 .k(k) 6222 .a_zero_point(0) 6223 .b_zero_point(0) 6224 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6225 } 6226 } 6227 #endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64 6228 6229 6230 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,k_eq_8)6231 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_eq_8) { 6232 TEST_REQUIRES_ARM_NEON_DOT; 6233 GemmMicrokernelTester() 6234 .mr(5) 6235 .nr(8) 6236 .kr(4) 6237 .sr(1) 6238 .m(5) 6239 .n(8) 6240 .k(8) 6241 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6242 } 6243 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,strided_cn)6244 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, strided_cn) { 6245 TEST_REQUIRES_ARM_NEON_DOT; 6246 GemmMicrokernelTester() 6247 .mr(5) 6248 .nr(8) 6249 .kr(4) 6250 .sr(1) 6251 .m(5) 6252 .n(8) 6253 .k(8) 6254 .cn_stride(11) 6255 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6256 } 6257 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,k_eq_8_subtile)6258 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_eq_8_subtile) { 6259 TEST_REQUIRES_ARM_NEON_DOT; 6260 for (uint32_t n = 1; n <= 8; n++) { 6261 for (uint32_t m = 1; m <= 5; m++) { 6262 GemmMicrokernelTester() 6263 .mr(5) 6264 .nr(8) 6265 .kr(4) 6266 .sr(1) 6267 .m(m) 6268 .n(n) 6269 .k(8) 6270 .iterations(1) 6271 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6272 } 6273 } 6274 } 6275 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,k_eq_8_subtile_m)6276 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_eq_8_subtile_m) { 6277 TEST_REQUIRES_ARM_NEON_DOT; 6278 for (uint32_t m = 1; m <= 5; m++) { 6279 GemmMicrokernelTester() 6280 .mr(5) 6281 .nr(8) 6282 .kr(4) 6283 .sr(1) 6284 .m(m) 6285 .n(8) 6286 .k(8) 6287 .iterations(1) 6288 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6289 } 6290 } 6291 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,k_eq_8_subtile_n)6292 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_eq_8_subtile_n) { 6293 TEST_REQUIRES_ARM_NEON_DOT; 6294 for (uint32_t n = 1; n <= 8; n++) { 6295 GemmMicrokernelTester() 6296 .mr(5) 6297 .nr(8) 6298 .kr(4) 6299 .sr(1) 6300 .m(5) 6301 .n(n) 6302 .k(8) 6303 .iterations(1) 6304 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6305 } 6306 } 6307 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,k_lt_8)6308 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_lt_8) { 6309 TEST_REQUIRES_ARM_NEON_DOT; 6310 for (size_t k = 1; k < 8; k++) { 6311 GemmMicrokernelTester() 6312 .mr(5) 6313 .nr(8) 6314 .kr(4) 6315 .sr(1) 6316 .m(5) 6317 .n(8) 6318 .k(k) 6319 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6320 } 6321 } 6322 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,k_lt_8_subtile)6323 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_lt_8_subtile) { 6324 TEST_REQUIRES_ARM_NEON_DOT; 6325 for (size_t k = 1; k < 8; k++) { 6326 for (uint32_t n = 1; n <= 8; n++) { 6327 for (uint32_t m = 1; m <= 5; m++) { 6328 GemmMicrokernelTester() 6329 .mr(5) 6330 .nr(8) 6331 .kr(4) 6332 .sr(1) 6333 .m(m) 6334 .n(n) 6335 .k(k) 6336 .iterations(1) 6337 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6338 } 6339 } 6340 } 6341 } 6342 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,k_gt_8)6343 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_gt_8) { 6344 TEST_REQUIRES_ARM_NEON_DOT; 6345 for (size_t k = 9; k < 16; k++) { 6346 GemmMicrokernelTester() 6347 .mr(5) 6348 .nr(8) 6349 .kr(4) 6350 .sr(1) 6351 .m(5) 6352 .n(8) 6353 .k(k) 6354 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6355 } 6356 } 6357 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,k_gt_8_subtile)6358 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_gt_8_subtile) { 6359 TEST_REQUIRES_ARM_NEON_DOT; 6360 for (size_t k = 9; k < 16; k++) { 6361 for (uint32_t n = 1; n <= 8; n++) { 6362 for (uint32_t m = 1; m <= 5; m++) { 6363 GemmMicrokernelTester() 6364 .mr(5) 6365 .nr(8) 6366 .kr(4) 6367 .sr(1) 6368 .m(m) 6369 .n(n) 6370 .k(k) 6371 .iterations(1) 6372 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6373 } 6374 } 6375 } 6376 } 6377 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,k_div_8)6378 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_div_8) { 6379 TEST_REQUIRES_ARM_NEON_DOT; 6380 for (size_t k = 16; k <= 80; k += 8) { 6381 GemmMicrokernelTester() 6382 .mr(5) 6383 .nr(8) 6384 .kr(4) 6385 .sr(1) 6386 .m(5) 6387 .n(8) 6388 .k(k) 6389 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6390 } 6391 } 6392 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,k_div_8_subtile)6393 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_div_8_subtile) { 6394 TEST_REQUIRES_ARM_NEON_DOT; 6395 for (size_t k = 16; k <= 80; k += 8) { 6396 for (uint32_t n = 1; n <= 8; n++) { 6397 for (uint32_t m = 1; m <= 5; m++) { 6398 GemmMicrokernelTester() 6399 .mr(5) 6400 .nr(8) 6401 .kr(4) 6402 .sr(1) 6403 .m(m) 6404 .n(n) 6405 .k(k) 6406 .iterations(1) 6407 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6408 } 6409 } 6410 } 6411 } 6412 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,n_gt_8)6413 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, n_gt_8) { 6414 TEST_REQUIRES_ARM_NEON_DOT; 6415 for (uint32_t n = 9; n < 16; n++) { 6416 for (size_t k = 1; k <= 40; k += 9) { 6417 GemmMicrokernelTester() 6418 .mr(5) 6419 .nr(8) 6420 .kr(4) 6421 .sr(1) 6422 .m(5) 6423 .n(n) 6424 .k(k) 6425 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6426 } 6427 } 6428 } 6429 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,n_gt_8_strided_cn)6430 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, n_gt_8_strided_cn) { 6431 TEST_REQUIRES_ARM_NEON_DOT; 6432 for (uint32_t n = 9; n < 16; n++) { 6433 for (size_t k = 1; k <= 40; k += 9) { 6434 GemmMicrokernelTester() 6435 .mr(5) 6436 .nr(8) 6437 .kr(4) 6438 .sr(1) 6439 .m(5) 6440 .n(n) 6441 .k(k) 6442 .cn_stride(11) 6443 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6444 } 6445 } 6446 } 6447 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,n_gt_8_subtile)6448 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, n_gt_8_subtile) { 6449 TEST_REQUIRES_ARM_NEON_DOT; 6450 for (uint32_t n = 9; n < 16; n++) { 6451 for (size_t k = 1; k <= 40; k += 9) { 6452 for (uint32_t m = 1; m <= 5; m++) { 6453 GemmMicrokernelTester() 6454 .mr(5) 6455 .nr(8) 6456 .kr(4) 6457 .sr(1) 6458 .m(m) 6459 .n(n) 6460 .k(k) 6461 .iterations(1) 6462 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6463 } 6464 } 6465 } 6466 } 6467 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,n_div_8)6468 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, n_div_8) { 6469 TEST_REQUIRES_ARM_NEON_DOT; 6470 for (uint32_t n = 16; n <= 24; n += 8) { 6471 for (size_t k = 1; k <= 40; k += 9) { 6472 GemmMicrokernelTester() 6473 .mr(5) 6474 .nr(8) 6475 .kr(4) 6476 .sr(1) 6477 .m(5) 6478 .n(n) 6479 .k(k) 6480 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6481 } 6482 } 6483 } 6484 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,n_div_8_strided_cn)6485 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, n_div_8_strided_cn) { 6486 TEST_REQUIRES_ARM_NEON_DOT; 6487 for (uint32_t n = 16; n <= 24; n += 8) { 6488 for (size_t k = 1; k <= 40; k += 9) { 6489 GemmMicrokernelTester() 6490 .mr(5) 6491 .nr(8) 6492 .kr(4) 6493 .sr(1) 6494 .m(5) 6495 .n(n) 6496 .k(k) 6497 .cn_stride(11) 6498 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6499 } 6500 } 6501 } 6502 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,n_div_8_subtile)6503 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, n_div_8_subtile) { 6504 TEST_REQUIRES_ARM_NEON_DOT; 6505 for (uint32_t n = 16; n <= 24; n += 8) { 6506 for (size_t k = 1; k <= 40; k += 9) { 6507 for (uint32_t m = 1; m <= 5; m++) { 6508 GemmMicrokernelTester() 6509 .mr(5) 6510 .nr(8) 6511 .kr(4) 6512 .sr(1) 6513 .m(m) 6514 .n(n) 6515 .k(k) 6516 .iterations(1) 6517 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6518 } 6519 } 6520 } 6521 } 6522 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,small_kernel)6523 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, small_kernel) { 6524 TEST_REQUIRES_ARM_NEON_DOT; 6525 for (size_t k = 1; k <= 40; k += 9) { 6526 GemmMicrokernelTester() 6527 .mr(5) 6528 .nr(8) 6529 .kr(4) 6530 .sr(1) 6531 .m(5) 6532 .n(8) 6533 .k(k) 6534 .ks(3) 6535 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6536 } 6537 } 6538 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,small_kernel_subtile)6539 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, small_kernel_subtile) { 6540 TEST_REQUIRES_ARM_NEON_DOT; 6541 for (size_t k = 1; k <= 40; k += 9) { 6542 for (uint32_t n = 1; n <= 8; n++) { 6543 for (uint32_t m = 1; m <= 5; m++) { 6544 GemmMicrokernelTester() 6545 .mr(5) 6546 .nr(8) 6547 .kr(4) 6548 .sr(1) 6549 .m(m) 6550 .n(n) 6551 .k(k) 6552 .ks(3) 6553 .iterations(1) 6554 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6555 } 6556 } 6557 } 6558 } 6559 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,n_gt_8_small_kernel)6560 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, n_gt_8_small_kernel) { 6561 TEST_REQUIRES_ARM_NEON_DOT; 6562 for (uint32_t n = 9; n < 16; n++) { 6563 for (size_t k = 1; k <= 40; k += 9) { 6564 GemmMicrokernelTester() 6565 .mr(5) 6566 .nr(8) 6567 .kr(4) 6568 .sr(1) 6569 .m(5) 6570 .n(n) 6571 .k(k) 6572 .ks(3) 6573 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6574 } 6575 } 6576 } 6577 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,n_div_8_small_kernel)6578 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, n_div_8_small_kernel) { 6579 TEST_REQUIRES_ARM_NEON_DOT; 6580 for (uint32_t n = 16; n <= 24; n += 8) { 6581 for (size_t k = 1; k <= 40; k += 9) { 6582 GemmMicrokernelTester() 6583 .mr(5) 6584 .nr(8) 6585 .kr(4) 6586 .sr(1) 6587 .m(5) 6588 .n(n) 6589 .k(k) 6590 .ks(3) 6591 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6592 } 6593 } 6594 } 6595 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,strided_cm_subtile)6596 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, strided_cm_subtile) { 6597 TEST_REQUIRES_ARM_NEON_DOT; 6598 for (size_t k = 1; k <= 40; k += 9) { 6599 for (uint32_t n = 1; n <= 8; n++) { 6600 for (uint32_t m = 1; m <= 5; m++) { 6601 GemmMicrokernelTester() 6602 .mr(5) 6603 .nr(8) 6604 .kr(4) 6605 .sr(1) 6606 .m(m) 6607 .n(n) 6608 .k(k) 6609 .cm_stride(11) 6610 .iterations(1) 6611 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6612 } 6613 } 6614 } 6615 } 6616 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,a_offset)6617 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, a_offset) { 6618 TEST_REQUIRES_ARM_NEON_DOT; 6619 for (size_t k = 1; k <= 40; k += 9) { 6620 GemmMicrokernelTester() 6621 .mr(5) 6622 .nr(8) 6623 .kr(4) 6624 .sr(1) 6625 .m(5) 6626 .n(8) 6627 .k(k) 6628 .ks(3) 6629 .a_offset(211) 6630 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6631 } 6632 } 6633 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,zero)6634 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, zero) { 6635 TEST_REQUIRES_ARM_NEON_DOT; 6636 for (size_t k = 1; k <= 40; k += 9) { 6637 for (uint32_t mz = 0; mz < 5; mz++) { 6638 GemmMicrokernelTester() 6639 .mr(5) 6640 .nr(8) 6641 .kr(4) 6642 .sr(1) 6643 .m(5) 6644 .n(8) 6645 .k(k) 6646 .ks(3) 6647 .a_offset(211) 6648 .zero_index(mz) 6649 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6650 } 6651 } 6652 } 6653 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,qmin)6654 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, qmin) { 6655 TEST_REQUIRES_ARM_NEON_DOT; 6656 GemmMicrokernelTester() 6657 .mr(5) 6658 .nr(8) 6659 .kr(4) 6660 .sr(1) 6661 .m(5) 6662 .n(8) 6663 .k(8) 6664 .qmin(128) 6665 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6666 } 6667 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,qmax)6668 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, qmax) { 6669 TEST_REQUIRES_ARM_NEON_DOT; 6670 GemmMicrokernelTester() 6671 .mr(5) 6672 .nr(8) 6673 .kr(4) 6674 .sr(1) 6675 .m(5) 6676 .n(8) 6677 .k(8) 6678 .qmax(128) 6679 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6680 } 6681 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,strided_cm)6682 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, strided_cm) { 6683 TEST_REQUIRES_ARM_NEON_DOT; 6684 GemmMicrokernelTester() 6685 .mr(5) 6686 .nr(8) 6687 .kr(4) 6688 .sr(1) 6689 .m(5) 6690 .n(8) 6691 .k(8) 6692 .cm_stride(11) 6693 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6694 } 6695 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,no_a_zero_point)6696 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, no_a_zero_point) { 6697 TEST_REQUIRES_ARM_NEON_DOT; 6698 for (size_t k = 1; k <= 40; k += 9) { 6699 GemmMicrokernelTester() 6700 .mr(5) 6701 .nr(8) 6702 .kr(4) 6703 .sr(1) 6704 .m(5) 6705 .n(8) 6706 .k(k) 6707 .a_zero_point(0) 6708 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6709 } 6710 } 6711 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,no_b_zero_point)6712 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, no_b_zero_point) { 6713 TEST_REQUIRES_ARM_NEON_DOT; 6714 for (size_t k = 1; k <= 40; k += 9) { 6715 GemmMicrokernelTester() 6716 .mr(5) 6717 .nr(8) 6718 .kr(4) 6719 .sr(1) 6720 .m(5) 6721 .n(8) 6722 .k(k) 6723 .b_zero_point(0) 6724 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6725 } 6726 } 6727 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT,no_zero_point)6728 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, no_zero_point) { 6729 TEST_REQUIRES_ARM_NEON_DOT; 6730 for (size_t k = 1; k <= 40; k += 9) { 6731 GemmMicrokernelTester() 6732 .mr(5) 6733 .nr(8) 6734 .kr(4) 6735 .sr(1) 6736 .m(5) 6737 .n(8) 6738 .k(k) 6739 .a_zero_point(0) 6740 .b_zero_point(0) 6741 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6742 } 6743 } 6744 #endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64 6745 6746 6747 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,k_eq_8)6748 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_eq_8) { 6749 TEST_REQUIRES_ARM_NEON_DOT; 6750 GemmMicrokernelTester() 6751 .mr(1) 6752 .nr(16) 6753 .kr(4) 6754 .sr(1) 6755 .m(1) 6756 .n(16) 6757 .k(8) 6758 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6759 } 6760 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,strided_cn)6761 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, strided_cn) { 6762 TEST_REQUIRES_ARM_NEON_DOT; 6763 GemmMicrokernelTester() 6764 .mr(1) 6765 .nr(16) 6766 .kr(4) 6767 .sr(1) 6768 .m(1) 6769 .n(16) 6770 .k(8) 6771 .cn_stride(19) 6772 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6773 } 6774 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,k_eq_8_subtile)6775 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_eq_8_subtile) { 6776 TEST_REQUIRES_ARM_NEON_DOT; 6777 for (uint32_t n = 1; n <= 16; n++) { 6778 for (uint32_t m = 1; m <= 1; m++) { 6779 GemmMicrokernelTester() 6780 .mr(1) 6781 .nr(16) 6782 .kr(4) 6783 .sr(1) 6784 .m(m) 6785 .n(n) 6786 .k(8) 6787 .iterations(1) 6788 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6789 } 6790 } 6791 } 6792 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,k_eq_8_subtile_m)6793 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_eq_8_subtile_m) { 6794 TEST_REQUIRES_ARM_NEON_DOT; 6795 for (uint32_t m = 1; m <= 1; m++) { 6796 GemmMicrokernelTester() 6797 .mr(1) 6798 .nr(16) 6799 .kr(4) 6800 .sr(1) 6801 .m(m) 6802 .n(16) 6803 .k(8) 6804 .iterations(1) 6805 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6806 } 6807 } 6808 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,k_eq_8_subtile_n)6809 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_eq_8_subtile_n) { 6810 TEST_REQUIRES_ARM_NEON_DOT; 6811 for (uint32_t n = 1; n <= 16; n++) { 6812 GemmMicrokernelTester() 6813 .mr(1) 6814 .nr(16) 6815 .kr(4) 6816 .sr(1) 6817 .m(1) 6818 .n(n) 6819 .k(8) 6820 .iterations(1) 6821 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6822 } 6823 } 6824 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,k_lt_8)6825 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_lt_8) { 6826 TEST_REQUIRES_ARM_NEON_DOT; 6827 for (size_t k = 1; k < 8; k++) { 6828 GemmMicrokernelTester() 6829 .mr(1) 6830 .nr(16) 6831 .kr(4) 6832 .sr(1) 6833 .m(1) 6834 .n(16) 6835 .k(k) 6836 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6837 } 6838 } 6839 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,k_lt_8_subtile)6840 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_lt_8_subtile) { 6841 TEST_REQUIRES_ARM_NEON_DOT; 6842 for (size_t k = 1; k < 8; k++) { 6843 for (uint32_t n = 1; n <= 16; n++) { 6844 for (uint32_t m = 1; m <= 1; m++) { 6845 GemmMicrokernelTester() 6846 .mr(1) 6847 .nr(16) 6848 .kr(4) 6849 .sr(1) 6850 .m(m) 6851 .n(n) 6852 .k(k) 6853 .iterations(1) 6854 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6855 } 6856 } 6857 } 6858 } 6859 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,k_gt_8)6860 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_gt_8) { 6861 TEST_REQUIRES_ARM_NEON_DOT; 6862 for (size_t k = 9; k < 16; k++) { 6863 GemmMicrokernelTester() 6864 .mr(1) 6865 .nr(16) 6866 .kr(4) 6867 .sr(1) 6868 .m(1) 6869 .n(16) 6870 .k(k) 6871 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6872 } 6873 } 6874 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,k_gt_8_subtile)6875 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_gt_8_subtile) { 6876 TEST_REQUIRES_ARM_NEON_DOT; 6877 for (size_t k = 9; k < 16; k++) { 6878 for (uint32_t n = 1; n <= 16; n++) { 6879 for (uint32_t m = 1; m <= 1; m++) { 6880 GemmMicrokernelTester() 6881 .mr(1) 6882 .nr(16) 6883 .kr(4) 6884 .sr(1) 6885 .m(m) 6886 .n(n) 6887 .k(k) 6888 .iterations(1) 6889 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6890 } 6891 } 6892 } 6893 } 6894 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,k_div_8)6895 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_div_8) { 6896 TEST_REQUIRES_ARM_NEON_DOT; 6897 for (size_t k = 16; k <= 80; k += 8) { 6898 GemmMicrokernelTester() 6899 .mr(1) 6900 .nr(16) 6901 .kr(4) 6902 .sr(1) 6903 .m(1) 6904 .n(16) 6905 .k(k) 6906 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6907 } 6908 } 6909 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,k_div_8_subtile)6910 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_div_8_subtile) { 6911 TEST_REQUIRES_ARM_NEON_DOT; 6912 for (size_t k = 16; k <= 80; k += 8) { 6913 for (uint32_t n = 1; n <= 16; n++) { 6914 for (uint32_t m = 1; m <= 1; m++) { 6915 GemmMicrokernelTester() 6916 .mr(1) 6917 .nr(16) 6918 .kr(4) 6919 .sr(1) 6920 .m(m) 6921 .n(n) 6922 .k(k) 6923 .iterations(1) 6924 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6925 } 6926 } 6927 } 6928 } 6929 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,n_gt_16)6930 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_gt_16) { 6931 TEST_REQUIRES_ARM_NEON_DOT; 6932 for (uint32_t n = 17; n < 32; n++) { 6933 for (size_t k = 1; k <= 40; k += 9) { 6934 GemmMicrokernelTester() 6935 .mr(1) 6936 .nr(16) 6937 .kr(4) 6938 .sr(1) 6939 .m(1) 6940 .n(n) 6941 .k(k) 6942 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6943 } 6944 } 6945 } 6946 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,n_gt_16_strided_cn)6947 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_gt_16_strided_cn) { 6948 TEST_REQUIRES_ARM_NEON_DOT; 6949 for (uint32_t n = 17; n < 32; n++) { 6950 for (size_t k = 1; k <= 40; k += 9) { 6951 GemmMicrokernelTester() 6952 .mr(1) 6953 .nr(16) 6954 .kr(4) 6955 .sr(1) 6956 .m(1) 6957 .n(n) 6958 .k(k) 6959 .cn_stride(19) 6960 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6961 } 6962 } 6963 } 6964 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,n_gt_16_subtile)6965 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_gt_16_subtile) { 6966 TEST_REQUIRES_ARM_NEON_DOT; 6967 for (uint32_t n = 17; n < 32; n++) { 6968 for (size_t k = 1; k <= 40; k += 9) { 6969 for (uint32_t m = 1; m <= 1; m++) { 6970 GemmMicrokernelTester() 6971 .mr(1) 6972 .nr(16) 6973 .kr(4) 6974 .sr(1) 6975 .m(m) 6976 .n(n) 6977 .k(k) 6978 .iterations(1) 6979 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6980 } 6981 } 6982 } 6983 } 6984 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,n_div_16)6985 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_div_16) { 6986 TEST_REQUIRES_ARM_NEON_DOT; 6987 for (uint32_t n = 32; n <= 48; n += 16) { 6988 for (size_t k = 1; k <= 40; k += 9) { 6989 GemmMicrokernelTester() 6990 .mr(1) 6991 .nr(16) 6992 .kr(4) 6993 .sr(1) 6994 .m(1) 6995 .n(n) 6996 .k(k) 6997 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 6998 } 6999 } 7000 } 7001 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,n_div_16_strided_cn)7002 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_div_16_strided_cn) { 7003 TEST_REQUIRES_ARM_NEON_DOT; 7004 for (uint32_t n = 32; n <= 48; n += 16) { 7005 for (size_t k = 1; k <= 40; k += 9) { 7006 GemmMicrokernelTester() 7007 .mr(1) 7008 .nr(16) 7009 .kr(4) 7010 .sr(1) 7011 .m(1) 7012 .n(n) 7013 .k(k) 7014 .cn_stride(19) 7015 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7016 } 7017 } 7018 } 7019 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,n_div_16_subtile)7020 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_div_16_subtile) { 7021 TEST_REQUIRES_ARM_NEON_DOT; 7022 for (uint32_t n = 32; n <= 48; n += 16) { 7023 for (size_t k = 1; k <= 40; k += 9) { 7024 for (uint32_t m = 1; m <= 1; m++) { 7025 GemmMicrokernelTester() 7026 .mr(1) 7027 .nr(16) 7028 .kr(4) 7029 .sr(1) 7030 .m(m) 7031 .n(n) 7032 .k(k) 7033 .iterations(1) 7034 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7035 } 7036 } 7037 } 7038 } 7039 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,small_kernel)7040 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, small_kernel) { 7041 TEST_REQUIRES_ARM_NEON_DOT; 7042 for (size_t k = 1; k <= 40; k += 9) { 7043 GemmMicrokernelTester() 7044 .mr(1) 7045 .nr(16) 7046 .kr(4) 7047 .sr(1) 7048 .m(1) 7049 .n(16) 7050 .k(k) 7051 .ks(3) 7052 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7053 } 7054 } 7055 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,small_kernel_subtile)7056 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, small_kernel_subtile) { 7057 TEST_REQUIRES_ARM_NEON_DOT; 7058 for (size_t k = 1; k <= 40; k += 9) { 7059 for (uint32_t n = 1; n <= 16; n++) { 7060 for (uint32_t m = 1; m <= 1; m++) { 7061 GemmMicrokernelTester() 7062 .mr(1) 7063 .nr(16) 7064 .kr(4) 7065 .sr(1) 7066 .m(m) 7067 .n(n) 7068 .k(k) 7069 .ks(3) 7070 .iterations(1) 7071 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7072 } 7073 } 7074 } 7075 } 7076 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,n_gt_16_small_kernel)7077 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_gt_16_small_kernel) { 7078 TEST_REQUIRES_ARM_NEON_DOT; 7079 for (uint32_t n = 17; n < 32; n++) { 7080 for (size_t k = 1; k <= 40; k += 9) { 7081 GemmMicrokernelTester() 7082 .mr(1) 7083 .nr(16) 7084 .kr(4) 7085 .sr(1) 7086 .m(1) 7087 .n(n) 7088 .k(k) 7089 .ks(3) 7090 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7091 } 7092 } 7093 } 7094 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,n_div_16_small_kernel)7095 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_div_16_small_kernel) { 7096 TEST_REQUIRES_ARM_NEON_DOT; 7097 for (uint32_t n = 32; n <= 48; n += 16) { 7098 for (size_t k = 1; k <= 40; k += 9) { 7099 GemmMicrokernelTester() 7100 .mr(1) 7101 .nr(16) 7102 .kr(4) 7103 .sr(1) 7104 .m(1) 7105 .n(n) 7106 .k(k) 7107 .ks(3) 7108 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7109 } 7110 } 7111 } 7112 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,strided_cm_subtile)7113 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, strided_cm_subtile) { 7114 TEST_REQUIRES_ARM_NEON_DOT; 7115 for (size_t k = 1; k <= 40; k += 9) { 7116 for (uint32_t n = 1; n <= 16; n++) { 7117 for (uint32_t m = 1; m <= 1; m++) { 7118 GemmMicrokernelTester() 7119 .mr(1) 7120 .nr(16) 7121 .kr(4) 7122 .sr(1) 7123 .m(m) 7124 .n(n) 7125 .k(k) 7126 .cm_stride(19) 7127 .iterations(1) 7128 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7129 } 7130 } 7131 } 7132 } 7133 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,a_offset)7134 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, a_offset) { 7135 TEST_REQUIRES_ARM_NEON_DOT; 7136 for (size_t k = 1; k <= 40; k += 9) { 7137 GemmMicrokernelTester() 7138 .mr(1) 7139 .nr(16) 7140 .kr(4) 7141 .sr(1) 7142 .m(1) 7143 .n(16) 7144 .k(k) 7145 .ks(3) 7146 .a_offset(43) 7147 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7148 } 7149 } 7150 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,zero)7151 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, zero) { 7152 TEST_REQUIRES_ARM_NEON_DOT; 7153 for (size_t k = 1; k <= 40; k += 9) { 7154 for (uint32_t mz = 0; mz < 1; mz++) { 7155 GemmMicrokernelTester() 7156 .mr(1) 7157 .nr(16) 7158 .kr(4) 7159 .sr(1) 7160 .m(1) 7161 .n(16) 7162 .k(k) 7163 .ks(3) 7164 .a_offset(43) 7165 .zero_index(mz) 7166 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7167 } 7168 } 7169 } 7170 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,qmin)7171 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, qmin) { 7172 TEST_REQUIRES_ARM_NEON_DOT; 7173 GemmMicrokernelTester() 7174 .mr(1) 7175 .nr(16) 7176 .kr(4) 7177 .sr(1) 7178 .m(1) 7179 .n(16) 7180 .k(8) 7181 .qmin(128) 7182 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7183 } 7184 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,qmax)7185 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, qmax) { 7186 TEST_REQUIRES_ARM_NEON_DOT; 7187 GemmMicrokernelTester() 7188 .mr(1) 7189 .nr(16) 7190 .kr(4) 7191 .sr(1) 7192 .m(1) 7193 .n(16) 7194 .k(8) 7195 .qmax(128) 7196 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7197 } 7198 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,strided_cm)7199 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, strided_cm) { 7200 TEST_REQUIRES_ARM_NEON_DOT; 7201 GemmMicrokernelTester() 7202 .mr(1) 7203 .nr(16) 7204 .kr(4) 7205 .sr(1) 7206 .m(1) 7207 .n(16) 7208 .k(8) 7209 .cm_stride(19) 7210 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7211 } 7212 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,no_a_zero_point)7213 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, no_a_zero_point) { 7214 TEST_REQUIRES_ARM_NEON_DOT; 7215 for (size_t k = 1; k <= 40; k += 9) { 7216 GemmMicrokernelTester() 7217 .mr(1) 7218 .nr(16) 7219 .kr(4) 7220 .sr(1) 7221 .m(1) 7222 .n(16) 7223 .k(k) 7224 .a_zero_point(0) 7225 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7226 } 7227 } 7228 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,no_b_zero_point)7229 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, no_b_zero_point) { 7230 TEST_REQUIRES_ARM_NEON_DOT; 7231 for (size_t k = 1; k <= 40; k += 9) { 7232 GemmMicrokernelTester() 7233 .mr(1) 7234 .nr(16) 7235 .kr(4) 7236 .sr(1) 7237 .m(1) 7238 .n(16) 7239 .k(k) 7240 .b_zero_point(0) 7241 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7242 } 7243 } 7244 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT,no_zero_point)7245 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, no_zero_point) { 7246 TEST_REQUIRES_ARM_NEON_DOT; 7247 for (size_t k = 1; k <= 40; k += 9) { 7248 GemmMicrokernelTester() 7249 .mr(1) 7250 .nr(16) 7251 .kr(4) 7252 .sr(1) 7253 .m(1) 7254 .n(16) 7255 .k(k) 7256 .a_zero_point(0) 7257 .b_zero_point(0) 7258 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7259 } 7260 } 7261 #endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64 7262 7263 7264 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_eq_8)7265 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_eq_8) { 7266 TEST_REQUIRES_ARM_NEON_DOT; 7267 GemmMicrokernelTester() 7268 .mr(6) 7269 .nr(16) 7270 .kr(4) 7271 .sr(1) 7272 .m(6) 7273 .n(16) 7274 .k(8) 7275 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7276 } 7277 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,strided_cn)7278 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, strided_cn) { 7279 TEST_REQUIRES_ARM_NEON_DOT; 7280 GemmMicrokernelTester() 7281 .mr(6) 7282 .nr(16) 7283 .kr(4) 7284 .sr(1) 7285 .m(6) 7286 .n(16) 7287 .k(8) 7288 .cn_stride(19) 7289 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7290 } 7291 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_eq_8_subtile)7292 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_eq_8_subtile) { 7293 TEST_REQUIRES_ARM_NEON_DOT; 7294 for (uint32_t n = 1; n <= 16; n++) { 7295 for (uint32_t m = 1; m <= 6; m++) { 7296 GemmMicrokernelTester() 7297 .mr(6) 7298 .nr(16) 7299 .kr(4) 7300 .sr(1) 7301 .m(m) 7302 .n(n) 7303 .k(8) 7304 .iterations(1) 7305 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7306 } 7307 } 7308 } 7309 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_eq_8_subtile_m)7310 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_eq_8_subtile_m) { 7311 TEST_REQUIRES_ARM_NEON_DOT; 7312 for (uint32_t m = 1; m <= 6; m++) { 7313 GemmMicrokernelTester() 7314 .mr(6) 7315 .nr(16) 7316 .kr(4) 7317 .sr(1) 7318 .m(m) 7319 .n(16) 7320 .k(8) 7321 .iterations(1) 7322 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7323 } 7324 } 7325 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_eq_8_subtile_n)7326 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_eq_8_subtile_n) { 7327 TEST_REQUIRES_ARM_NEON_DOT; 7328 for (uint32_t n = 1; n <= 16; n++) { 7329 GemmMicrokernelTester() 7330 .mr(6) 7331 .nr(16) 7332 .kr(4) 7333 .sr(1) 7334 .m(6) 7335 .n(n) 7336 .k(8) 7337 .iterations(1) 7338 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7339 } 7340 } 7341 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_lt_8)7342 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_lt_8) { 7343 TEST_REQUIRES_ARM_NEON_DOT; 7344 for (size_t k = 1; k < 8; k++) { 7345 GemmMicrokernelTester() 7346 .mr(6) 7347 .nr(16) 7348 .kr(4) 7349 .sr(1) 7350 .m(6) 7351 .n(16) 7352 .k(k) 7353 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7354 } 7355 } 7356 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_lt_8_subtile)7357 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_lt_8_subtile) { 7358 TEST_REQUIRES_ARM_NEON_DOT; 7359 for (size_t k = 1; k < 8; k++) { 7360 for (uint32_t n = 1; n <= 16; n++) { 7361 for (uint32_t m = 1; m <= 6; m++) { 7362 GemmMicrokernelTester() 7363 .mr(6) 7364 .nr(16) 7365 .kr(4) 7366 .sr(1) 7367 .m(m) 7368 .n(n) 7369 .k(k) 7370 .iterations(1) 7371 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7372 } 7373 } 7374 } 7375 } 7376 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_gt_8)7377 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_gt_8) { 7378 TEST_REQUIRES_ARM_NEON_DOT; 7379 for (size_t k = 9; k < 16; k++) { 7380 GemmMicrokernelTester() 7381 .mr(6) 7382 .nr(16) 7383 .kr(4) 7384 .sr(1) 7385 .m(6) 7386 .n(16) 7387 .k(k) 7388 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7389 } 7390 } 7391 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_gt_8_subtile)7392 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_gt_8_subtile) { 7393 TEST_REQUIRES_ARM_NEON_DOT; 7394 for (size_t k = 9; k < 16; k++) { 7395 for (uint32_t n = 1; n <= 16; n++) { 7396 for (uint32_t m = 1; m <= 6; m++) { 7397 GemmMicrokernelTester() 7398 .mr(6) 7399 .nr(16) 7400 .kr(4) 7401 .sr(1) 7402 .m(m) 7403 .n(n) 7404 .k(k) 7405 .iterations(1) 7406 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7407 } 7408 } 7409 } 7410 } 7411 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_div_8)7412 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_div_8) { 7413 TEST_REQUIRES_ARM_NEON_DOT; 7414 for (size_t k = 16; k <= 80; k += 8) { 7415 GemmMicrokernelTester() 7416 .mr(6) 7417 .nr(16) 7418 .kr(4) 7419 .sr(1) 7420 .m(6) 7421 .n(16) 7422 .k(k) 7423 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7424 } 7425 } 7426 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_div_8_subtile)7427 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_div_8_subtile) { 7428 TEST_REQUIRES_ARM_NEON_DOT; 7429 for (size_t k = 16; k <= 80; k += 8) { 7430 for (uint32_t n = 1; n <= 16; n++) { 7431 for (uint32_t m = 1; m <= 6; m++) { 7432 GemmMicrokernelTester() 7433 .mr(6) 7434 .nr(16) 7435 .kr(4) 7436 .sr(1) 7437 .m(m) 7438 .n(n) 7439 .k(k) 7440 .iterations(1) 7441 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7442 } 7443 } 7444 } 7445 } 7446 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,n_gt_16)7447 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_gt_16) { 7448 TEST_REQUIRES_ARM_NEON_DOT; 7449 for (uint32_t n = 17; n < 32; n++) { 7450 for (size_t k = 1; k <= 40; k += 9) { 7451 GemmMicrokernelTester() 7452 .mr(6) 7453 .nr(16) 7454 .kr(4) 7455 .sr(1) 7456 .m(6) 7457 .n(n) 7458 .k(k) 7459 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7460 } 7461 } 7462 } 7463 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,n_gt_16_strided_cn)7464 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_gt_16_strided_cn) { 7465 TEST_REQUIRES_ARM_NEON_DOT; 7466 for (uint32_t n = 17; n < 32; n++) { 7467 for (size_t k = 1; k <= 40; k += 9) { 7468 GemmMicrokernelTester() 7469 .mr(6) 7470 .nr(16) 7471 .kr(4) 7472 .sr(1) 7473 .m(6) 7474 .n(n) 7475 .k(k) 7476 .cn_stride(19) 7477 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7478 } 7479 } 7480 } 7481 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,n_gt_16_subtile)7482 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_gt_16_subtile) { 7483 TEST_REQUIRES_ARM_NEON_DOT; 7484 for (uint32_t n = 17; n < 32; n++) { 7485 for (size_t k = 1; k <= 40; k += 9) { 7486 for (uint32_t m = 1; m <= 6; m++) { 7487 GemmMicrokernelTester() 7488 .mr(6) 7489 .nr(16) 7490 .kr(4) 7491 .sr(1) 7492 .m(m) 7493 .n(n) 7494 .k(k) 7495 .iterations(1) 7496 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7497 } 7498 } 7499 } 7500 } 7501 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,n_div_16)7502 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_div_16) { 7503 TEST_REQUIRES_ARM_NEON_DOT; 7504 for (uint32_t n = 32; n <= 48; n += 16) { 7505 for (size_t k = 1; k <= 40; k += 9) { 7506 GemmMicrokernelTester() 7507 .mr(6) 7508 .nr(16) 7509 .kr(4) 7510 .sr(1) 7511 .m(6) 7512 .n(n) 7513 .k(k) 7514 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7515 } 7516 } 7517 } 7518 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,n_div_16_strided_cn)7519 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_div_16_strided_cn) { 7520 TEST_REQUIRES_ARM_NEON_DOT; 7521 for (uint32_t n = 32; n <= 48; n += 16) { 7522 for (size_t k = 1; k <= 40; k += 9) { 7523 GemmMicrokernelTester() 7524 .mr(6) 7525 .nr(16) 7526 .kr(4) 7527 .sr(1) 7528 .m(6) 7529 .n(n) 7530 .k(k) 7531 .cn_stride(19) 7532 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7533 } 7534 } 7535 } 7536 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,n_div_16_subtile)7537 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_div_16_subtile) { 7538 TEST_REQUIRES_ARM_NEON_DOT; 7539 for (uint32_t n = 32; n <= 48; n += 16) { 7540 for (size_t k = 1; k <= 40; k += 9) { 7541 for (uint32_t m = 1; m <= 6; m++) { 7542 GemmMicrokernelTester() 7543 .mr(6) 7544 .nr(16) 7545 .kr(4) 7546 .sr(1) 7547 .m(m) 7548 .n(n) 7549 .k(k) 7550 .iterations(1) 7551 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7552 } 7553 } 7554 } 7555 } 7556 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,small_kernel)7557 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, small_kernel) { 7558 TEST_REQUIRES_ARM_NEON_DOT; 7559 for (size_t k = 1; k <= 40; k += 9) { 7560 GemmMicrokernelTester() 7561 .mr(6) 7562 .nr(16) 7563 .kr(4) 7564 .sr(1) 7565 .m(6) 7566 .n(16) 7567 .k(k) 7568 .ks(3) 7569 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7570 } 7571 } 7572 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,small_kernel_subtile)7573 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, small_kernel_subtile) { 7574 TEST_REQUIRES_ARM_NEON_DOT; 7575 for (size_t k = 1; k <= 40; k += 9) { 7576 for (uint32_t n = 1; n <= 16; n++) { 7577 for (uint32_t m = 1; m <= 6; m++) { 7578 GemmMicrokernelTester() 7579 .mr(6) 7580 .nr(16) 7581 .kr(4) 7582 .sr(1) 7583 .m(m) 7584 .n(n) 7585 .k(k) 7586 .ks(3) 7587 .iterations(1) 7588 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7589 } 7590 } 7591 } 7592 } 7593 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,n_gt_16_small_kernel)7594 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_gt_16_small_kernel) { 7595 TEST_REQUIRES_ARM_NEON_DOT; 7596 for (uint32_t n = 17; n < 32; n++) { 7597 for (size_t k = 1; k <= 40; k += 9) { 7598 GemmMicrokernelTester() 7599 .mr(6) 7600 .nr(16) 7601 .kr(4) 7602 .sr(1) 7603 .m(6) 7604 .n(n) 7605 .k(k) 7606 .ks(3) 7607 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7608 } 7609 } 7610 } 7611 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,n_div_16_small_kernel)7612 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_div_16_small_kernel) { 7613 TEST_REQUIRES_ARM_NEON_DOT; 7614 for (uint32_t n = 32; n <= 48; n += 16) { 7615 for (size_t k = 1; k <= 40; k += 9) { 7616 GemmMicrokernelTester() 7617 .mr(6) 7618 .nr(16) 7619 .kr(4) 7620 .sr(1) 7621 .m(6) 7622 .n(n) 7623 .k(k) 7624 .ks(3) 7625 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7626 } 7627 } 7628 } 7629 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,strided_cm_subtile)7630 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, strided_cm_subtile) { 7631 TEST_REQUIRES_ARM_NEON_DOT; 7632 for (size_t k = 1; k <= 40; k += 9) { 7633 for (uint32_t n = 1; n <= 16; n++) { 7634 for (uint32_t m = 1; m <= 6; m++) { 7635 GemmMicrokernelTester() 7636 .mr(6) 7637 .nr(16) 7638 .kr(4) 7639 .sr(1) 7640 .m(m) 7641 .n(n) 7642 .k(k) 7643 .cm_stride(19) 7644 .iterations(1) 7645 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7646 } 7647 } 7648 } 7649 } 7650 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,a_offset)7651 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, a_offset) { 7652 TEST_REQUIRES_ARM_NEON_DOT; 7653 for (size_t k = 1; k <= 40; k += 9) { 7654 GemmMicrokernelTester() 7655 .mr(6) 7656 .nr(16) 7657 .kr(4) 7658 .sr(1) 7659 .m(6) 7660 .n(16) 7661 .k(k) 7662 .ks(3) 7663 .a_offset(251) 7664 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7665 } 7666 } 7667 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,zero)7668 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, zero) { 7669 TEST_REQUIRES_ARM_NEON_DOT; 7670 for (size_t k = 1; k <= 40; k += 9) { 7671 for (uint32_t mz = 0; mz < 6; mz++) { 7672 GemmMicrokernelTester() 7673 .mr(6) 7674 .nr(16) 7675 .kr(4) 7676 .sr(1) 7677 .m(6) 7678 .n(16) 7679 .k(k) 7680 .ks(3) 7681 .a_offset(251) 7682 .zero_index(mz) 7683 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7684 } 7685 } 7686 } 7687 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,qmin)7688 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, qmin) { 7689 TEST_REQUIRES_ARM_NEON_DOT; 7690 GemmMicrokernelTester() 7691 .mr(6) 7692 .nr(16) 7693 .kr(4) 7694 .sr(1) 7695 .m(6) 7696 .n(16) 7697 .k(8) 7698 .qmin(128) 7699 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7700 } 7701 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,qmax)7702 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, qmax) { 7703 TEST_REQUIRES_ARM_NEON_DOT; 7704 GemmMicrokernelTester() 7705 .mr(6) 7706 .nr(16) 7707 .kr(4) 7708 .sr(1) 7709 .m(6) 7710 .n(16) 7711 .k(8) 7712 .qmax(128) 7713 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7714 } 7715 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,strided_cm)7716 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, strided_cm) { 7717 TEST_REQUIRES_ARM_NEON_DOT; 7718 GemmMicrokernelTester() 7719 .mr(6) 7720 .nr(16) 7721 .kr(4) 7722 .sr(1) 7723 .m(6) 7724 .n(16) 7725 .k(8) 7726 .cm_stride(19) 7727 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7728 } 7729 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,no_a_zero_point)7730 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, no_a_zero_point) { 7731 TEST_REQUIRES_ARM_NEON_DOT; 7732 for (size_t k = 1; k <= 40; k += 9) { 7733 GemmMicrokernelTester() 7734 .mr(6) 7735 .nr(16) 7736 .kr(4) 7737 .sr(1) 7738 .m(6) 7739 .n(16) 7740 .k(k) 7741 .a_zero_point(0) 7742 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7743 } 7744 } 7745 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,no_b_zero_point)7746 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, no_b_zero_point) { 7747 TEST_REQUIRES_ARM_NEON_DOT; 7748 for (size_t k = 1; k <= 40; k += 9) { 7749 GemmMicrokernelTester() 7750 .mr(6) 7751 .nr(16) 7752 .kr(4) 7753 .sr(1) 7754 .m(6) 7755 .n(16) 7756 .k(k) 7757 .b_zero_point(0) 7758 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7759 } 7760 } 7761 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,no_zero_point)7762 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, no_zero_point) { 7763 TEST_REQUIRES_ARM_NEON_DOT; 7764 for (size_t k = 1; k <= 40; k += 9) { 7765 GemmMicrokernelTester() 7766 .mr(6) 7767 .nr(16) 7768 .kr(4) 7769 .sr(1) 7770 .m(6) 7771 .n(16) 7772 .k(k) 7773 .a_zero_point(0) 7774 .b_zero_point(0) 7775 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7776 } 7777 } 7778 #endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64 7779 7780 7781 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,k_eq_8)7782 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_eq_8) { 7783 TEST_REQUIRES_ARM_NEON_DOT; 7784 GemmMicrokernelTester() 7785 .mr(1) 7786 .nr(32) 7787 .kr(4) 7788 .sr(1) 7789 .m(1) 7790 .n(32) 7791 .k(8) 7792 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7793 } 7794 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,strided_cn)7795 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, strided_cn) { 7796 TEST_REQUIRES_ARM_NEON_DOT; 7797 GemmMicrokernelTester() 7798 .mr(1) 7799 .nr(32) 7800 .kr(4) 7801 .sr(1) 7802 .m(1) 7803 .n(32) 7804 .k(8) 7805 .cn_stride(37) 7806 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7807 } 7808 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,k_eq_8_subtile)7809 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_eq_8_subtile) { 7810 TEST_REQUIRES_ARM_NEON_DOT; 7811 for (uint32_t n = 1; n <= 32; n++) { 7812 for (uint32_t m = 1; m <= 1; m++) { 7813 GemmMicrokernelTester() 7814 .mr(1) 7815 .nr(32) 7816 .kr(4) 7817 .sr(1) 7818 .m(m) 7819 .n(n) 7820 .k(8) 7821 .iterations(1) 7822 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7823 } 7824 } 7825 } 7826 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,k_eq_8_subtile_m)7827 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_eq_8_subtile_m) { 7828 TEST_REQUIRES_ARM_NEON_DOT; 7829 for (uint32_t m = 1; m <= 1; m++) { 7830 GemmMicrokernelTester() 7831 .mr(1) 7832 .nr(32) 7833 .kr(4) 7834 .sr(1) 7835 .m(m) 7836 .n(32) 7837 .k(8) 7838 .iterations(1) 7839 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7840 } 7841 } 7842 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,k_eq_8_subtile_n)7843 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_eq_8_subtile_n) { 7844 TEST_REQUIRES_ARM_NEON_DOT; 7845 for (uint32_t n = 1; n <= 32; n++) { 7846 GemmMicrokernelTester() 7847 .mr(1) 7848 .nr(32) 7849 .kr(4) 7850 .sr(1) 7851 .m(1) 7852 .n(n) 7853 .k(8) 7854 .iterations(1) 7855 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7856 } 7857 } 7858 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,k_lt_8)7859 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_lt_8) { 7860 TEST_REQUIRES_ARM_NEON_DOT; 7861 for (size_t k = 1; k < 8; k++) { 7862 GemmMicrokernelTester() 7863 .mr(1) 7864 .nr(32) 7865 .kr(4) 7866 .sr(1) 7867 .m(1) 7868 .n(32) 7869 .k(k) 7870 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7871 } 7872 } 7873 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,k_lt_8_subtile)7874 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_lt_8_subtile) { 7875 TEST_REQUIRES_ARM_NEON_DOT; 7876 for (size_t k = 1; k < 8; k++) { 7877 for (uint32_t n = 1; n <= 32; n++) { 7878 for (uint32_t m = 1; m <= 1; m++) { 7879 GemmMicrokernelTester() 7880 .mr(1) 7881 .nr(32) 7882 .kr(4) 7883 .sr(1) 7884 .m(m) 7885 .n(n) 7886 .k(k) 7887 .iterations(1) 7888 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7889 } 7890 } 7891 } 7892 } 7893 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,k_gt_8)7894 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_gt_8) { 7895 TEST_REQUIRES_ARM_NEON_DOT; 7896 for (size_t k = 9; k < 16; k++) { 7897 GemmMicrokernelTester() 7898 .mr(1) 7899 .nr(32) 7900 .kr(4) 7901 .sr(1) 7902 .m(1) 7903 .n(32) 7904 .k(k) 7905 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7906 } 7907 } 7908 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,k_gt_8_subtile)7909 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_gt_8_subtile) { 7910 TEST_REQUIRES_ARM_NEON_DOT; 7911 for (size_t k = 9; k < 16; k++) { 7912 for (uint32_t n = 1; n <= 32; n++) { 7913 for (uint32_t m = 1; m <= 1; m++) { 7914 GemmMicrokernelTester() 7915 .mr(1) 7916 .nr(32) 7917 .kr(4) 7918 .sr(1) 7919 .m(m) 7920 .n(n) 7921 .k(k) 7922 .iterations(1) 7923 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7924 } 7925 } 7926 } 7927 } 7928 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,k_div_8)7929 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_div_8) { 7930 TEST_REQUIRES_ARM_NEON_DOT; 7931 for (size_t k = 16; k <= 80; k += 8) { 7932 GemmMicrokernelTester() 7933 .mr(1) 7934 .nr(32) 7935 .kr(4) 7936 .sr(1) 7937 .m(1) 7938 .n(32) 7939 .k(k) 7940 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7941 } 7942 } 7943 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,k_div_8_subtile)7944 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_div_8_subtile) { 7945 TEST_REQUIRES_ARM_NEON_DOT; 7946 for (size_t k = 16; k <= 80; k += 8) { 7947 for (uint32_t n = 1; n <= 32; n++) { 7948 for (uint32_t m = 1; m <= 1; m++) { 7949 GemmMicrokernelTester() 7950 .mr(1) 7951 .nr(32) 7952 .kr(4) 7953 .sr(1) 7954 .m(m) 7955 .n(n) 7956 .k(k) 7957 .iterations(1) 7958 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7959 } 7960 } 7961 } 7962 } 7963 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,n_gt_32)7964 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, n_gt_32) { 7965 TEST_REQUIRES_ARM_NEON_DOT; 7966 for (uint32_t n = 33; n < 64; n++) { 7967 for (size_t k = 1; k <= 40; k += 9) { 7968 GemmMicrokernelTester() 7969 .mr(1) 7970 .nr(32) 7971 .kr(4) 7972 .sr(1) 7973 .m(1) 7974 .n(n) 7975 .k(k) 7976 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7977 } 7978 } 7979 } 7980 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,n_gt_32_strided_cn)7981 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, n_gt_32_strided_cn) { 7982 TEST_REQUIRES_ARM_NEON_DOT; 7983 for (uint32_t n = 33; n < 64; n++) { 7984 for (size_t k = 1; k <= 40; k += 9) { 7985 GemmMicrokernelTester() 7986 .mr(1) 7987 .nr(32) 7988 .kr(4) 7989 .sr(1) 7990 .m(1) 7991 .n(n) 7992 .k(k) 7993 .cn_stride(37) 7994 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 7995 } 7996 } 7997 } 7998 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,n_gt_32_subtile)7999 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, n_gt_32_subtile) { 8000 TEST_REQUIRES_ARM_NEON_DOT; 8001 for (uint32_t n = 33; n < 64; n++) { 8002 for (size_t k = 1; k <= 40; k += 9) { 8003 for (uint32_t m = 1; m <= 1; m++) { 8004 GemmMicrokernelTester() 8005 .mr(1) 8006 .nr(32) 8007 .kr(4) 8008 .sr(1) 8009 .m(m) 8010 .n(n) 8011 .k(k) 8012 .iterations(1) 8013 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8014 } 8015 } 8016 } 8017 } 8018 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,n_div_32)8019 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, n_div_32) { 8020 TEST_REQUIRES_ARM_NEON_DOT; 8021 for (uint32_t n = 64; n <= 96; n += 32) { 8022 for (size_t k = 1; k <= 40; k += 9) { 8023 GemmMicrokernelTester() 8024 .mr(1) 8025 .nr(32) 8026 .kr(4) 8027 .sr(1) 8028 .m(1) 8029 .n(n) 8030 .k(k) 8031 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8032 } 8033 } 8034 } 8035 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,n_div_32_strided_cn)8036 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, n_div_32_strided_cn) { 8037 TEST_REQUIRES_ARM_NEON_DOT; 8038 for (uint32_t n = 64; n <= 96; n += 32) { 8039 for (size_t k = 1; k <= 40; k += 9) { 8040 GemmMicrokernelTester() 8041 .mr(1) 8042 .nr(32) 8043 .kr(4) 8044 .sr(1) 8045 .m(1) 8046 .n(n) 8047 .k(k) 8048 .cn_stride(37) 8049 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8050 } 8051 } 8052 } 8053 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,n_div_32_subtile)8054 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, n_div_32_subtile) { 8055 TEST_REQUIRES_ARM_NEON_DOT; 8056 for (uint32_t n = 64; n <= 96; n += 32) { 8057 for (size_t k = 1; k <= 40; k += 9) { 8058 for (uint32_t m = 1; m <= 1; m++) { 8059 GemmMicrokernelTester() 8060 .mr(1) 8061 .nr(32) 8062 .kr(4) 8063 .sr(1) 8064 .m(m) 8065 .n(n) 8066 .k(k) 8067 .iterations(1) 8068 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8069 } 8070 } 8071 } 8072 } 8073 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,small_kernel)8074 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, small_kernel) { 8075 TEST_REQUIRES_ARM_NEON_DOT; 8076 for (size_t k = 1; k <= 40; k += 9) { 8077 GemmMicrokernelTester() 8078 .mr(1) 8079 .nr(32) 8080 .kr(4) 8081 .sr(1) 8082 .m(1) 8083 .n(32) 8084 .k(k) 8085 .ks(3) 8086 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8087 } 8088 } 8089 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,small_kernel_subtile)8090 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, small_kernel_subtile) { 8091 TEST_REQUIRES_ARM_NEON_DOT; 8092 for (size_t k = 1; k <= 40; k += 9) { 8093 for (uint32_t n = 1; n <= 32; n++) { 8094 for (uint32_t m = 1; m <= 1; m++) { 8095 GemmMicrokernelTester() 8096 .mr(1) 8097 .nr(32) 8098 .kr(4) 8099 .sr(1) 8100 .m(m) 8101 .n(n) 8102 .k(k) 8103 .ks(3) 8104 .iterations(1) 8105 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8106 } 8107 } 8108 } 8109 } 8110 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,n_gt_32_small_kernel)8111 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, n_gt_32_small_kernel) { 8112 TEST_REQUIRES_ARM_NEON_DOT; 8113 for (uint32_t n = 33; n < 64; n++) { 8114 for (size_t k = 1; k <= 40; k += 9) { 8115 GemmMicrokernelTester() 8116 .mr(1) 8117 .nr(32) 8118 .kr(4) 8119 .sr(1) 8120 .m(1) 8121 .n(n) 8122 .k(k) 8123 .ks(3) 8124 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8125 } 8126 } 8127 } 8128 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,n_div_32_small_kernel)8129 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, n_div_32_small_kernel) { 8130 TEST_REQUIRES_ARM_NEON_DOT; 8131 for (uint32_t n = 64; n <= 96; n += 32) { 8132 for (size_t k = 1; k <= 40; k += 9) { 8133 GemmMicrokernelTester() 8134 .mr(1) 8135 .nr(32) 8136 .kr(4) 8137 .sr(1) 8138 .m(1) 8139 .n(n) 8140 .k(k) 8141 .ks(3) 8142 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8143 } 8144 } 8145 } 8146 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,strided_cm_subtile)8147 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, strided_cm_subtile) { 8148 TEST_REQUIRES_ARM_NEON_DOT; 8149 for (size_t k = 1; k <= 40; k += 9) { 8150 for (uint32_t n = 1; n <= 32; n++) { 8151 for (uint32_t m = 1; m <= 1; m++) { 8152 GemmMicrokernelTester() 8153 .mr(1) 8154 .nr(32) 8155 .kr(4) 8156 .sr(1) 8157 .m(m) 8158 .n(n) 8159 .k(k) 8160 .cm_stride(37) 8161 .iterations(1) 8162 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8163 } 8164 } 8165 } 8166 } 8167 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,a_offset)8168 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, a_offset) { 8169 TEST_REQUIRES_ARM_NEON_DOT; 8170 for (size_t k = 1; k <= 40; k += 9) { 8171 GemmMicrokernelTester() 8172 .mr(1) 8173 .nr(32) 8174 .kr(4) 8175 .sr(1) 8176 .m(1) 8177 .n(32) 8178 .k(k) 8179 .ks(3) 8180 .a_offset(43) 8181 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8182 } 8183 } 8184 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,zero)8185 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, zero) { 8186 TEST_REQUIRES_ARM_NEON_DOT; 8187 for (size_t k = 1; k <= 40; k += 9) { 8188 for (uint32_t mz = 0; mz < 1; mz++) { 8189 GemmMicrokernelTester() 8190 .mr(1) 8191 .nr(32) 8192 .kr(4) 8193 .sr(1) 8194 .m(1) 8195 .n(32) 8196 .k(k) 8197 .ks(3) 8198 .a_offset(43) 8199 .zero_index(mz) 8200 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8201 } 8202 } 8203 } 8204 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,qmin)8205 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, qmin) { 8206 TEST_REQUIRES_ARM_NEON_DOT; 8207 GemmMicrokernelTester() 8208 .mr(1) 8209 .nr(32) 8210 .kr(4) 8211 .sr(1) 8212 .m(1) 8213 .n(32) 8214 .k(8) 8215 .qmin(128) 8216 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8217 } 8218 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,qmax)8219 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, qmax) { 8220 TEST_REQUIRES_ARM_NEON_DOT; 8221 GemmMicrokernelTester() 8222 .mr(1) 8223 .nr(32) 8224 .kr(4) 8225 .sr(1) 8226 .m(1) 8227 .n(32) 8228 .k(8) 8229 .qmax(128) 8230 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8231 } 8232 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,strided_cm)8233 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, strided_cm) { 8234 TEST_REQUIRES_ARM_NEON_DOT; 8235 GemmMicrokernelTester() 8236 .mr(1) 8237 .nr(32) 8238 .kr(4) 8239 .sr(1) 8240 .m(1) 8241 .n(32) 8242 .k(8) 8243 .cm_stride(37) 8244 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8245 } 8246 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,no_a_zero_point)8247 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, no_a_zero_point) { 8248 TEST_REQUIRES_ARM_NEON_DOT; 8249 for (size_t k = 1; k <= 40; k += 9) { 8250 GemmMicrokernelTester() 8251 .mr(1) 8252 .nr(32) 8253 .kr(4) 8254 .sr(1) 8255 .m(1) 8256 .n(32) 8257 .k(k) 8258 .a_zero_point(0) 8259 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8260 } 8261 } 8262 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,no_b_zero_point)8263 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, no_b_zero_point) { 8264 TEST_REQUIRES_ARM_NEON_DOT; 8265 for (size_t k = 1; k <= 40; k += 9) { 8266 GemmMicrokernelTester() 8267 .mr(1) 8268 .nr(32) 8269 .kr(4) 8270 .sr(1) 8271 .m(1) 8272 .n(32) 8273 .k(k) 8274 .b_zero_point(0) 8275 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8276 } 8277 } 8278 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT,no_zero_point)8279 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, no_zero_point) { 8280 TEST_REQUIRES_ARM_NEON_DOT; 8281 for (size_t k = 1; k <= 40; k += 9) { 8282 GemmMicrokernelTester() 8283 .mr(1) 8284 .nr(32) 8285 .kr(4) 8286 .sr(1) 8287 .m(1) 8288 .n(32) 8289 .k(k) 8290 .a_zero_point(0) 8291 .b_zero_point(0) 8292 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8293 } 8294 } 8295 #endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64 8296 8297 8298 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,k_eq_8)8299 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_eq_8) { 8300 TEST_REQUIRES_ARM_NEON; 8301 GemmMicrokernelTester() 8302 .mr(4) 8303 .nr(16) 8304 .kr(1) 8305 .sr(1) 8306 .m(4) 8307 .n(16) 8308 .k(8) 8309 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8310 } 8311 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,strided_cn)8312 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, strided_cn) { 8313 TEST_REQUIRES_ARM_NEON; 8314 GemmMicrokernelTester() 8315 .mr(4) 8316 .nr(16) 8317 .kr(1) 8318 .sr(1) 8319 .m(4) 8320 .n(16) 8321 .k(8) 8322 .cn_stride(19) 8323 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8324 } 8325 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,k_eq_8_subtile)8326 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_eq_8_subtile) { 8327 TEST_REQUIRES_ARM_NEON; 8328 for (uint32_t n = 1; n <= 16; n++) { 8329 for (uint32_t m = 1; m <= 4; m++) { 8330 GemmMicrokernelTester() 8331 .mr(4) 8332 .nr(16) 8333 .kr(1) 8334 .sr(1) 8335 .m(m) 8336 .n(n) 8337 .k(8) 8338 .iterations(1) 8339 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8340 } 8341 } 8342 } 8343 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,k_eq_8_subtile_m)8344 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_eq_8_subtile_m) { 8345 TEST_REQUIRES_ARM_NEON; 8346 for (uint32_t m = 1; m <= 4; m++) { 8347 GemmMicrokernelTester() 8348 .mr(4) 8349 .nr(16) 8350 .kr(1) 8351 .sr(1) 8352 .m(m) 8353 .n(16) 8354 .k(8) 8355 .iterations(1) 8356 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8357 } 8358 } 8359 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,k_eq_8_subtile_n)8360 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_eq_8_subtile_n) { 8361 TEST_REQUIRES_ARM_NEON; 8362 for (uint32_t n = 1; n <= 16; n++) { 8363 GemmMicrokernelTester() 8364 .mr(4) 8365 .nr(16) 8366 .kr(1) 8367 .sr(1) 8368 .m(4) 8369 .n(n) 8370 .k(8) 8371 .iterations(1) 8372 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8373 } 8374 } 8375 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,k_lt_8)8376 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_lt_8) { 8377 TEST_REQUIRES_ARM_NEON; 8378 for (size_t k = 1; k < 8; k++) { 8379 GemmMicrokernelTester() 8380 .mr(4) 8381 .nr(16) 8382 .kr(1) 8383 .sr(1) 8384 .m(4) 8385 .n(16) 8386 .k(k) 8387 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8388 } 8389 } 8390 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,k_lt_8_subtile)8391 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_lt_8_subtile) { 8392 TEST_REQUIRES_ARM_NEON; 8393 for (size_t k = 1; k < 8; k++) { 8394 for (uint32_t n = 1; n <= 16; n++) { 8395 for (uint32_t m = 1; m <= 4; m++) { 8396 GemmMicrokernelTester() 8397 .mr(4) 8398 .nr(16) 8399 .kr(1) 8400 .sr(1) 8401 .m(m) 8402 .n(n) 8403 .k(k) 8404 .iterations(1) 8405 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8406 } 8407 } 8408 } 8409 } 8410 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,k_gt_8)8411 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_gt_8) { 8412 TEST_REQUIRES_ARM_NEON; 8413 for (size_t k = 9; k < 16; k++) { 8414 GemmMicrokernelTester() 8415 .mr(4) 8416 .nr(16) 8417 .kr(1) 8418 .sr(1) 8419 .m(4) 8420 .n(16) 8421 .k(k) 8422 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8423 } 8424 } 8425 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,k_gt_8_subtile)8426 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_gt_8_subtile) { 8427 TEST_REQUIRES_ARM_NEON; 8428 for (size_t k = 9; k < 16; k++) { 8429 for (uint32_t n = 1; n <= 16; n++) { 8430 for (uint32_t m = 1; m <= 4; m++) { 8431 GemmMicrokernelTester() 8432 .mr(4) 8433 .nr(16) 8434 .kr(1) 8435 .sr(1) 8436 .m(m) 8437 .n(n) 8438 .k(k) 8439 .iterations(1) 8440 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8441 } 8442 } 8443 } 8444 } 8445 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,k_div_8)8446 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_div_8) { 8447 TEST_REQUIRES_ARM_NEON; 8448 for (size_t k = 16; k <= 80; k += 8) { 8449 GemmMicrokernelTester() 8450 .mr(4) 8451 .nr(16) 8452 .kr(1) 8453 .sr(1) 8454 .m(4) 8455 .n(16) 8456 .k(k) 8457 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8458 } 8459 } 8460 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,k_div_8_subtile)8461 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_div_8_subtile) { 8462 TEST_REQUIRES_ARM_NEON; 8463 for (size_t k = 16; k <= 80; k += 8) { 8464 for (uint32_t n = 1; n <= 16; n++) { 8465 for (uint32_t m = 1; m <= 4; m++) { 8466 GemmMicrokernelTester() 8467 .mr(4) 8468 .nr(16) 8469 .kr(1) 8470 .sr(1) 8471 .m(m) 8472 .n(n) 8473 .k(k) 8474 .iterations(1) 8475 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8476 } 8477 } 8478 } 8479 } 8480 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,n_gt_16)8481 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_gt_16) { 8482 TEST_REQUIRES_ARM_NEON; 8483 for (uint32_t n = 17; n < 32; n++) { 8484 for (size_t k = 1; k <= 40; k += 9) { 8485 GemmMicrokernelTester() 8486 .mr(4) 8487 .nr(16) 8488 .kr(1) 8489 .sr(1) 8490 .m(4) 8491 .n(n) 8492 .k(k) 8493 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8494 } 8495 } 8496 } 8497 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,n_gt_16_strided_cn)8498 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_gt_16_strided_cn) { 8499 TEST_REQUIRES_ARM_NEON; 8500 for (uint32_t n = 17; n < 32; n++) { 8501 for (size_t k = 1; k <= 40; k += 9) { 8502 GemmMicrokernelTester() 8503 .mr(4) 8504 .nr(16) 8505 .kr(1) 8506 .sr(1) 8507 .m(4) 8508 .n(n) 8509 .k(k) 8510 .cn_stride(19) 8511 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8512 } 8513 } 8514 } 8515 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,n_gt_16_subtile)8516 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_gt_16_subtile) { 8517 TEST_REQUIRES_ARM_NEON; 8518 for (uint32_t n = 17; n < 32; n++) { 8519 for (size_t k = 1; k <= 40; k += 9) { 8520 for (uint32_t m = 1; m <= 4; m++) { 8521 GemmMicrokernelTester() 8522 .mr(4) 8523 .nr(16) 8524 .kr(1) 8525 .sr(1) 8526 .m(m) 8527 .n(n) 8528 .k(k) 8529 .iterations(1) 8530 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8531 } 8532 } 8533 } 8534 } 8535 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,n_div_16)8536 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_div_16) { 8537 TEST_REQUIRES_ARM_NEON; 8538 for (uint32_t n = 32; n <= 48; n += 16) { 8539 for (size_t k = 1; k <= 40; k += 9) { 8540 GemmMicrokernelTester() 8541 .mr(4) 8542 .nr(16) 8543 .kr(1) 8544 .sr(1) 8545 .m(4) 8546 .n(n) 8547 .k(k) 8548 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8549 } 8550 } 8551 } 8552 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,n_div_16_strided_cn)8553 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_div_16_strided_cn) { 8554 TEST_REQUIRES_ARM_NEON; 8555 for (uint32_t n = 32; n <= 48; n += 16) { 8556 for (size_t k = 1; k <= 40; k += 9) { 8557 GemmMicrokernelTester() 8558 .mr(4) 8559 .nr(16) 8560 .kr(1) 8561 .sr(1) 8562 .m(4) 8563 .n(n) 8564 .k(k) 8565 .cn_stride(19) 8566 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8567 } 8568 } 8569 } 8570 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,n_div_16_subtile)8571 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_div_16_subtile) { 8572 TEST_REQUIRES_ARM_NEON; 8573 for (uint32_t n = 32; n <= 48; n += 16) { 8574 for (size_t k = 1; k <= 40; k += 9) { 8575 for (uint32_t m = 1; m <= 4; m++) { 8576 GemmMicrokernelTester() 8577 .mr(4) 8578 .nr(16) 8579 .kr(1) 8580 .sr(1) 8581 .m(m) 8582 .n(n) 8583 .k(k) 8584 .iterations(1) 8585 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8586 } 8587 } 8588 } 8589 } 8590 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,small_kernel)8591 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, small_kernel) { 8592 TEST_REQUIRES_ARM_NEON; 8593 for (size_t k = 1; k <= 40; k += 9) { 8594 GemmMicrokernelTester() 8595 .mr(4) 8596 .nr(16) 8597 .kr(1) 8598 .sr(1) 8599 .m(4) 8600 .n(16) 8601 .k(k) 8602 .ks(3) 8603 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8604 } 8605 } 8606 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,small_kernel_subtile)8607 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, small_kernel_subtile) { 8608 TEST_REQUIRES_ARM_NEON; 8609 for (size_t k = 1; k <= 40; k += 9) { 8610 for (uint32_t n = 1; n <= 16; n++) { 8611 for (uint32_t m = 1; m <= 4; m++) { 8612 GemmMicrokernelTester() 8613 .mr(4) 8614 .nr(16) 8615 .kr(1) 8616 .sr(1) 8617 .m(m) 8618 .n(n) 8619 .k(k) 8620 .ks(3) 8621 .iterations(1) 8622 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8623 } 8624 } 8625 } 8626 } 8627 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,n_gt_16_small_kernel)8628 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_gt_16_small_kernel) { 8629 TEST_REQUIRES_ARM_NEON; 8630 for (uint32_t n = 17; n < 32; n++) { 8631 for (size_t k = 1; k <= 40; k += 9) { 8632 GemmMicrokernelTester() 8633 .mr(4) 8634 .nr(16) 8635 .kr(1) 8636 .sr(1) 8637 .m(4) 8638 .n(n) 8639 .k(k) 8640 .ks(3) 8641 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8642 } 8643 } 8644 } 8645 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,n_div_16_small_kernel)8646 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_div_16_small_kernel) { 8647 TEST_REQUIRES_ARM_NEON; 8648 for (uint32_t n = 32; n <= 48; n += 16) { 8649 for (size_t k = 1; k <= 40; k += 9) { 8650 GemmMicrokernelTester() 8651 .mr(4) 8652 .nr(16) 8653 .kr(1) 8654 .sr(1) 8655 .m(4) 8656 .n(n) 8657 .k(k) 8658 .ks(3) 8659 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8660 } 8661 } 8662 } 8663 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,strided_cm_subtile)8664 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, strided_cm_subtile) { 8665 TEST_REQUIRES_ARM_NEON; 8666 for (size_t k = 1; k <= 40; k += 9) { 8667 for (uint32_t n = 1; n <= 16; n++) { 8668 for (uint32_t m = 1; m <= 4; m++) { 8669 GemmMicrokernelTester() 8670 .mr(4) 8671 .nr(16) 8672 .kr(1) 8673 .sr(1) 8674 .m(m) 8675 .n(n) 8676 .k(k) 8677 .cm_stride(19) 8678 .iterations(1) 8679 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8680 } 8681 } 8682 } 8683 } 8684 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,a_offset)8685 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, a_offset) { 8686 TEST_REQUIRES_ARM_NEON; 8687 for (size_t k = 1; k <= 40; k += 9) { 8688 GemmMicrokernelTester() 8689 .mr(4) 8690 .nr(16) 8691 .kr(1) 8692 .sr(1) 8693 .m(4) 8694 .n(16) 8695 .k(k) 8696 .ks(3) 8697 .a_offset(163) 8698 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8699 } 8700 } 8701 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,zero)8702 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, zero) { 8703 TEST_REQUIRES_ARM_NEON; 8704 for (size_t k = 1; k <= 40; k += 9) { 8705 for (uint32_t mz = 0; mz < 4; mz++) { 8706 GemmMicrokernelTester() 8707 .mr(4) 8708 .nr(16) 8709 .kr(1) 8710 .sr(1) 8711 .m(4) 8712 .n(16) 8713 .k(k) 8714 .ks(3) 8715 .a_offset(163) 8716 .zero_index(mz) 8717 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8718 } 8719 } 8720 } 8721 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,qmin)8722 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, qmin) { 8723 TEST_REQUIRES_ARM_NEON; 8724 GemmMicrokernelTester() 8725 .mr(4) 8726 .nr(16) 8727 .kr(1) 8728 .sr(1) 8729 .m(4) 8730 .n(16) 8731 .k(8) 8732 .qmin(128) 8733 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8734 } 8735 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,qmax)8736 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, qmax) { 8737 TEST_REQUIRES_ARM_NEON; 8738 GemmMicrokernelTester() 8739 .mr(4) 8740 .nr(16) 8741 .kr(1) 8742 .sr(1) 8743 .m(4) 8744 .n(16) 8745 .k(8) 8746 .qmax(128) 8747 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8748 } 8749 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,strided_cm)8750 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, strided_cm) { 8751 TEST_REQUIRES_ARM_NEON; 8752 GemmMicrokernelTester() 8753 .mr(4) 8754 .nr(16) 8755 .kr(1) 8756 .sr(1) 8757 .m(4) 8758 .n(16) 8759 .k(8) 8760 .cm_stride(19) 8761 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8762 } 8763 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,no_a_zero_point)8764 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, no_a_zero_point) { 8765 TEST_REQUIRES_ARM_NEON; 8766 for (size_t k = 1; k <= 40; k += 9) { 8767 GemmMicrokernelTester() 8768 .mr(4) 8769 .nr(16) 8770 .kr(1) 8771 .sr(1) 8772 .m(4) 8773 .n(16) 8774 .k(k) 8775 .a_zero_point(0) 8776 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8777 } 8778 } 8779 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,no_b_zero_point)8780 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, no_b_zero_point) { 8781 TEST_REQUIRES_ARM_NEON; 8782 for (size_t k = 1; k <= 40; k += 9) { 8783 GemmMicrokernelTester() 8784 .mr(4) 8785 .nr(16) 8786 .kr(1) 8787 .sr(1) 8788 .m(4) 8789 .n(16) 8790 .k(k) 8791 .b_zero_point(0) 8792 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8793 } 8794 } 8795 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75,no_zero_point)8796 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, no_zero_point) { 8797 TEST_REQUIRES_ARM_NEON; 8798 for (size_t k = 1; k <= 40; k += 9) { 8799 GemmMicrokernelTester() 8800 .mr(4) 8801 .nr(16) 8802 .kr(1) 8803 .sr(1) 8804 .m(4) 8805 .n(16) 8806 .k(k) 8807 .a_zero_point(0) 8808 .b_zero_point(0) 8809 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8810 } 8811 } 8812 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY 8813 8814 8815 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8)8816 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8) { 8817 TEST_REQUIRES_ARM_NEON; 8818 GemmMicrokernelTester() 8819 .mr(4) 8820 .nr(16) 8821 .kr(1) 8822 .sr(1) 8823 .m(4) 8824 .n(16) 8825 .k(8) 8826 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8827 } 8828 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cn)8829 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) { 8830 TEST_REQUIRES_ARM_NEON; 8831 GemmMicrokernelTester() 8832 .mr(4) 8833 .nr(16) 8834 .kr(1) 8835 .sr(1) 8836 .m(4) 8837 .n(16) 8838 .k(8) 8839 .cn_stride(19) 8840 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8841 } 8842 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile)8843 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) { 8844 TEST_REQUIRES_ARM_NEON; 8845 for (uint32_t n = 1; n <= 16; n++) { 8846 for (uint32_t m = 1; m <= 4; m++) { 8847 GemmMicrokernelTester() 8848 .mr(4) 8849 .nr(16) 8850 .kr(1) 8851 .sr(1) 8852 .m(m) 8853 .n(n) 8854 .k(8) 8855 .iterations(1) 8856 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8857 } 8858 } 8859 } 8860 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_m)8861 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_m) { 8862 TEST_REQUIRES_ARM_NEON; 8863 for (uint32_t m = 1; m <= 4; m++) { 8864 GemmMicrokernelTester() 8865 .mr(4) 8866 .nr(16) 8867 .kr(1) 8868 .sr(1) 8869 .m(m) 8870 .n(16) 8871 .k(8) 8872 .iterations(1) 8873 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8874 } 8875 } 8876 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_n)8877 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_n) { 8878 TEST_REQUIRES_ARM_NEON; 8879 for (uint32_t n = 1; n <= 16; n++) { 8880 GemmMicrokernelTester() 8881 .mr(4) 8882 .nr(16) 8883 .kr(1) 8884 .sr(1) 8885 .m(4) 8886 .n(n) 8887 .k(8) 8888 .iterations(1) 8889 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8890 } 8891 } 8892 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8)8893 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8) { 8894 TEST_REQUIRES_ARM_NEON; 8895 for (size_t k = 1; k < 8; k++) { 8896 GemmMicrokernelTester() 8897 .mr(4) 8898 .nr(16) 8899 .kr(1) 8900 .sr(1) 8901 .m(4) 8902 .n(16) 8903 .k(k) 8904 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8905 } 8906 } 8907 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8_subtile)8908 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_subtile) { 8909 TEST_REQUIRES_ARM_NEON; 8910 for (size_t k = 1; k < 8; k++) { 8911 for (uint32_t n = 1; n <= 16; n++) { 8912 for (uint32_t m = 1; m <= 4; m++) { 8913 GemmMicrokernelTester() 8914 .mr(4) 8915 .nr(16) 8916 .kr(1) 8917 .sr(1) 8918 .m(m) 8919 .n(n) 8920 .k(k) 8921 .iterations(1) 8922 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8923 } 8924 } 8925 } 8926 } 8927 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8)8928 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8) { 8929 TEST_REQUIRES_ARM_NEON; 8930 for (size_t k = 9; k < 16; k++) { 8931 GemmMicrokernelTester() 8932 .mr(4) 8933 .nr(16) 8934 .kr(1) 8935 .sr(1) 8936 .m(4) 8937 .n(16) 8938 .k(k) 8939 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8940 } 8941 } 8942 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8_subtile)8943 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_subtile) { 8944 TEST_REQUIRES_ARM_NEON; 8945 for (size_t k = 9; k < 16; k++) { 8946 for (uint32_t n = 1; n <= 16; n++) { 8947 for (uint32_t m = 1; m <= 4; m++) { 8948 GemmMicrokernelTester() 8949 .mr(4) 8950 .nr(16) 8951 .kr(1) 8952 .sr(1) 8953 .m(m) 8954 .n(n) 8955 .k(k) 8956 .iterations(1) 8957 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8958 } 8959 } 8960 } 8961 } 8962 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_div_8)8963 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8) { 8964 TEST_REQUIRES_ARM_NEON; 8965 for (size_t k = 16; k <= 80; k += 8) { 8966 GemmMicrokernelTester() 8967 .mr(4) 8968 .nr(16) 8969 .kr(1) 8970 .sr(1) 8971 .m(4) 8972 .n(16) 8973 .k(k) 8974 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8975 } 8976 } 8977 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_div_8_subtile)8978 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_subtile) { 8979 TEST_REQUIRES_ARM_NEON; 8980 for (size_t k = 16; k <= 80; k += 8) { 8981 for (uint32_t n = 1; n <= 16; n++) { 8982 for (uint32_t m = 1; m <= 4; m++) { 8983 GemmMicrokernelTester() 8984 .mr(4) 8985 .nr(16) 8986 .kr(1) 8987 .sr(1) 8988 .m(m) 8989 .n(n) 8990 .k(k) 8991 .iterations(1) 8992 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 8993 } 8994 } 8995 } 8996 } 8997 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_16)8998 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16) { 8999 TEST_REQUIRES_ARM_NEON; 9000 for (uint32_t n = 17; n < 32; n++) { 9001 for (size_t k = 1; k <= 40; k += 9) { 9002 GemmMicrokernelTester() 9003 .mr(4) 9004 .nr(16) 9005 .kr(1) 9006 .sr(1) 9007 .m(4) 9008 .n(n) 9009 .k(k) 9010 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9011 } 9012 } 9013 } 9014 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_16_strided_cn)9015 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_strided_cn) { 9016 TEST_REQUIRES_ARM_NEON; 9017 for (uint32_t n = 17; n < 32; n++) { 9018 for (size_t k = 1; k <= 40; k += 9) { 9019 GemmMicrokernelTester() 9020 .mr(4) 9021 .nr(16) 9022 .kr(1) 9023 .sr(1) 9024 .m(4) 9025 .n(n) 9026 .k(k) 9027 .cn_stride(19) 9028 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9029 } 9030 } 9031 } 9032 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_16_subtile)9033 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_subtile) { 9034 TEST_REQUIRES_ARM_NEON; 9035 for (uint32_t n = 17; n < 32; n++) { 9036 for (size_t k = 1; k <= 40; k += 9) { 9037 for (uint32_t m = 1; m <= 4; m++) { 9038 GemmMicrokernelTester() 9039 .mr(4) 9040 .nr(16) 9041 .kr(1) 9042 .sr(1) 9043 .m(m) 9044 .n(n) 9045 .k(k) 9046 .iterations(1) 9047 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9048 } 9049 } 9050 } 9051 } 9052 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_16)9053 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16) { 9054 TEST_REQUIRES_ARM_NEON; 9055 for (uint32_t n = 32; n <= 48; n += 16) { 9056 for (size_t k = 1; k <= 40; k += 9) { 9057 GemmMicrokernelTester() 9058 .mr(4) 9059 .nr(16) 9060 .kr(1) 9061 .sr(1) 9062 .m(4) 9063 .n(n) 9064 .k(k) 9065 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9066 } 9067 } 9068 } 9069 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_16_strided_cn)9070 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_strided_cn) { 9071 TEST_REQUIRES_ARM_NEON; 9072 for (uint32_t n = 32; n <= 48; n += 16) { 9073 for (size_t k = 1; k <= 40; k += 9) { 9074 GemmMicrokernelTester() 9075 .mr(4) 9076 .nr(16) 9077 .kr(1) 9078 .sr(1) 9079 .m(4) 9080 .n(n) 9081 .k(k) 9082 .cn_stride(19) 9083 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9084 } 9085 } 9086 } 9087 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_16_subtile)9088 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_subtile) { 9089 TEST_REQUIRES_ARM_NEON; 9090 for (uint32_t n = 32; n <= 48; n += 16) { 9091 for (size_t k = 1; k <= 40; k += 9) { 9092 for (uint32_t m = 1; m <= 4; m++) { 9093 GemmMicrokernelTester() 9094 .mr(4) 9095 .nr(16) 9096 .kr(1) 9097 .sr(1) 9098 .m(m) 9099 .n(n) 9100 .k(k) 9101 .iterations(1) 9102 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9103 } 9104 } 9105 } 9106 } 9107 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,small_kernel)9108 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel) { 9109 TEST_REQUIRES_ARM_NEON; 9110 for (size_t k = 1; k <= 40; k += 9) { 9111 GemmMicrokernelTester() 9112 .mr(4) 9113 .nr(16) 9114 .kr(1) 9115 .sr(1) 9116 .m(4) 9117 .n(16) 9118 .k(k) 9119 .ks(3) 9120 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9121 } 9122 } 9123 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,small_kernel_subtile)9124 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel_subtile) { 9125 TEST_REQUIRES_ARM_NEON; 9126 for (size_t k = 1; k <= 40; k += 9) { 9127 for (uint32_t n = 1; n <= 16; n++) { 9128 for (uint32_t m = 1; m <= 4; m++) { 9129 GemmMicrokernelTester() 9130 .mr(4) 9131 .nr(16) 9132 .kr(1) 9133 .sr(1) 9134 .m(m) 9135 .n(n) 9136 .k(k) 9137 .ks(3) 9138 .iterations(1) 9139 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9140 } 9141 } 9142 } 9143 } 9144 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_16_small_kernel)9145 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_small_kernel) { 9146 TEST_REQUIRES_ARM_NEON; 9147 for (uint32_t n = 17; n < 32; n++) { 9148 for (size_t k = 1; k <= 40; k += 9) { 9149 GemmMicrokernelTester() 9150 .mr(4) 9151 .nr(16) 9152 .kr(1) 9153 .sr(1) 9154 .m(4) 9155 .n(n) 9156 .k(k) 9157 .ks(3) 9158 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9159 } 9160 } 9161 } 9162 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_16_small_kernel)9163 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_small_kernel) { 9164 TEST_REQUIRES_ARM_NEON; 9165 for (uint32_t n = 32; n <= 48; n += 16) { 9166 for (size_t k = 1; k <= 40; k += 9) { 9167 GemmMicrokernelTester() 9168 .mr(4) 9169 .nr(16) 9170 .kr(1) 9171 .sr(1) 9172 .m(4) 9173 .n(n) 9174 .k(k) 9175 .ks(3) 9176 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9177 } 9178 } 9179 } 9180 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cm_subtile)9181 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm_subtile) { 9182 TEST_REQUIRES_ARM_NEON; 9183 for (size_t k = 1; k <= 40; k += 9) { 9184 for (uint32_t n = 1; n <= 16; n++) { 9185 for (uint32_t m = 1; m <= 4; m++) { 9186 GemmMicrokernelTester() 9187 .mr(4) 9188 .nr(16) 9189 .kr(1) 9190 .sr(1) 9191 .m(m) 9192 .n(n) 9193 .k(k) 9194 .cm_stride(19) 9195 .iterations(1) 9196 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9197 } 9198 } 9199 } 9200 } 9201 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,a_offset)9202 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, a_offset) { 9203 TEST_REQUIRES_ARM_NEON; 9204 for (size_t k = 1; k <= 40; k += 9) { 9205 GemmMicrokernelTester() 9206 .mr(4) 9207 .nr(16) 9208 .kr(1) 9209 .sr(1) 9210 .m(4) 9211 .n(16) 9212 .k(k) 9213 .ks(3) 9214 .a_offset(163) 9215 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9216 } 9217 } 9218 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,zero)9219 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, zero) { 9220 TEST_REQUIRES_ARM_NEON; 9221 for (size_t k = 1; k <= 40; k += 9) { 9222 for (uint32_t mz = 0; mz < 4; mz++) { 9223 GemmMicrokernelTester() 9224 .mr(4) 9225 .nr(16) 9226 .kr(1) 9227 .sr(1) 9228 .m(4) 9229 .n(16) 9230 .k(k) 9231 .ks(3) 9232 .a_offset(163) 9233 .zero_index(mz) 9234 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9235 } 9236 } 9237 } 9238 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,qmin)9239 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmin) { 9240 TEST_REQUIRES_ARM_NEON; 9241 GemmMicrokernelTester() 9242 .mr(4) 9243 .nr(16) 9244 .kr(1) 9245 .sr(1) 9246 .m(4) 9247 .n(16) 9248 .k(8) 9249 .qmin(128) 9250 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9251 } 9252 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,qmax)9253 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmax) { 9254 TEST_REQUIRES_ARM_NEON; 9255 GemmMicrokernelTester() 9256 .mr(4) 9257 .nr(16) 9258 .kr(1) 9259 .sr(1) 9260 .m(4) 9261 .n(16) 9262 .k(8) 9263 .qmax(128) 9264 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9265 } 9266 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cm)9267 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) { 9268 TEST_REQUIRES_ARM_NEON; 9269 GemmMicrokernelTester() 9270 .mr(4) 9271 .nr(16) 9272 .kr(1) 9273 .sr(1) 9274 .m(4) 9275 .n(16) 9276 .k(8) 9277 .cm_stride(19) 9278 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9279 } 9280 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,no_a_zero_point)9281 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, no_a_zero_point) { 9282 TEST_REQUIRES_ARM_NEON; 9283 for (size_t k = 1; k <= 40; k += 9) { 9284 GemmMicrokernelTester() 9285 .mr(4) 9286 .nr(16) 9287 .kr(1) 9288 .sr(1) 9289 .m(4) 9290 .n(16) 9291 .k(k) 9292 .a_zero_point(0) 9293 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9294 } 9295 } 9296 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,no_b_zero_point)9297 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, no_b_zero_point) { 9298 TEST_REQUIRES_ARM_NEON; 9299 for (size_t k = 1; k <= 40; k += 9) { 9300 GemmMicrokernelTester() 9301 .mr(4) 9302 .nr(16) 9303 .kr(1) 9304 .sr(1) 9305 .m(4) 9306 .n(16) 9307 .k(k) 9308 .b_zero_point(0) 9309 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9310 } 9311 } 9312 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,no_zero_point)9313 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, no_zero_point) { 9314 TEST_REQUIRES_ARM_NEON; 9315 for (size_t k = 1; k <= 40; k += 9) { 9316 GemmMicrokernelTester() 9317 .mr(4) 9318 .nr(16) 9319 .kr(1) 9320 .sr(1) 9321 .m(4) 9322 .n(16) 9323 .k(k) 9324 .a_zero_point(0) 9325 .b_zero_point(0) 9326 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9327 } 9328 } 9329 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY 9330 9331 9332 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8)9333 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) { 9334 TEST_REQUIRES_ARM_NEON; 9335 GemmMicrokernelTester() 9336 .mr(4) 9337 .nr(16) 9338 .kr(1) 9339 .sr(1) 9340 .m(4) 9341 .n(16) 9342 .k(8) 9343 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9344 } 9345 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,strided_cn)9346 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cn) { 9347 TEST_REQUIRES_ARM_NEON; 9348 GemmMicrokernelTester() 9349 .mr(4) 9350 .nr(16) 9351 .kr(1) 9352 .sr(1) 9353 .m(4) 9354 .n(16) 9355 .k(8) 9356 .cn_stride(19) 9357 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9358 } 9359 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile)9360 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) { 9361 TEST_REQUIRES_ARM_NEON; 9362 for (uint32_t n = 1; n <= 16; n++) { 9363 for (uint32_t m = 1; m <= 4; m++) { 9364 GemmMicrokernelTester() 9365 .mr(4) 9366 .nr(16) 9367 .kr(1) 9368 .sr(1) 9369 .m(m) 9370 .n(n) 9371 .k(8) 9372 .iterations(1) 9373 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9374 } 9375 } 9376 } 9377 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_m)9378 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) { 9379 TEST_REQUIRES_ARM_NEON; 9380 for (uint32_t m = 1; m <= 4; m++) { 9381 GemmMicrokernelTester() 9382 .mr(4) 9383 .nr(16) 9384 .kr(1) 9385 .sr(1) 9386 .m(m) 9387 .n(16) 9388 .k(8) 9389 .iterations(1) 9390 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9391 } 9392 } 9393 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_n)9394 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) { 9395 TEST_REQUIRES_ARM_NEON; 9396 for (uint32_t n = 1; n <= 16; n++) { 9397 GemmMicrokernelTester() 9398 .mr(4) 9399 .nr(16) 9400 .kr(1) 9401 .sr(1) 9402 .m(4) 9403 .n(n) 9404 .k(8) 9405 .iterations(1) 9406 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9407 } 9408 } 9409 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_lt_8)9410 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) { 9411 TEST_REQUIRES_ARM_NEON; 9412 for (size_t k = 1; k < 8; k++) { 9413 GemmMicrokernelTester() 9414 .mr(4) 9415 .nr(16) 9416 .kr(1) 9417 .sr(1) 9418 .m(4) 9419 .n(16) 9420 .k(k) 9421 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9422 } 9423 } 9424 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_lt_8_subtile)9425 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) { 9426 TEST_REQUIRES_ARM_NEON; 9427 for (size_t k = 1; k < 8; k++) { 9428 for (uint32_t n = 1; n <= 16; n++) { 9429 for (uint32_t m = 1; m <= 4; m++) { 9430 GemmMicrokernelTester() 9431 .mr(4) 9432 .nr(16) 9433 .kr(1) 9434 .sr(1) 9435 .m(m) 9436 .n(n) 9437 .k(k) 9438 .iterations(1) 9439 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9440 } 9441 } 9442 } 9443 } 9444 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_gt_8)9445 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) { 9446 TEST_REQUIRES_ARM_NEON; 9447 for (size_t k = 9; k < 16; k++) { 9448 GemmMicrokernelTester() 9449 .mr(4) 9450 .nr(16) 9451 .kr(1) 9452 .sr(1) 9453 .m(4) 9454 .n(16) 9455 .k(k) 9456 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9457 } 9458 } 9459 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_gt_8_subtile)9460 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) { 9461 TEST_REQUIRES_ARM_NEON; 9462 for (size_t k = 9; k < 16; k++) { 9463 for (uint32_t n = 1; n <= 16; n++) { 9464 for (uint32_t m = 1; m <= 4; m++) { 9465 GemmMicrokernelTester() 9466 .mr(4) 9467 .nr(16) 9468 .kr(1) 9469 .sr(1) 9470 .m(m) 9471 .n(n) 9472 .k(k) 9473 .iterations(1) 9474 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9475 } 9476 } 9477 } 9478 } 9479 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_div_8)9480 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8) { 9481 TEST_REQUIRES_ARM_NEON; 9482 for (size_t k = 16; k <= 80; k += 8) { 9483 GemmMicrokernelTester() 9484 .mr(4) 9485 .nr(16) 9486 .kr(1) 9487 .sr(1) 9488 .m(4) 9489 .n(16) 9490 .k(k) 9491 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9492 } 9493 } 9494 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_div_8_subtile)9495 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) { 9496 TEST_REQUIRES_ARM_NEON; 9497 for (size_t k = 16; k <= 80; k += 8) { 9498 for (uint32_t n = 1; n <= 16; n++) { 9499 for (uint32_t m = 1; m <= 4; m++) { 9500 GemmMicrokernelTester() 9501 .mr(4) 9502 .nr(16) 9503 .kr(1) 9504 .sr(1) 9505 .m(m) 9506 .n(n) 9507 .k(k) 9508 .iterations(1) 9509 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9510 } 9511 } 9512 } 9513 } 9514 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_16)9515 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16) { 9516 TEST_REQUIRES_ARM_NEON; 9517 for (uint32_t n = 17; n < 32; n++) { 9518 for (size_t k = 1; k <= 40; k += 9) { 9519 GemmMicrokernelTester() 9520 .mr(4) 9521 .nr(16) 9522 .kr(1) 9523 .sr(1) 9524 .m(4) 9525 .n(n) 9526 .k(k) 9527 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9528 } 9529 } 9530 } 9531 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_16_strided_cn)9532 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_strided_cn) { 9533 TEST_REQUIRES_ARM_NEON; 9534 for (uint32_t n = 17; n < 32; n++) { 9535 for (size_t k = 1; k <= 40; k += 9) { 9536 GemmMicrokernelTester() 9537 .mr(4) 9538 .nr(16) 9539 .kr(1) 9540 .sr(1) 9541 .m(4) 9542 .n(n) 9543 .k(k) 9544 .cn_stride(19) 9545 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9546 } 9547 } 9548 } 9549 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_16_subtile)9550 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_subtile) { 9551 TEST_REQUIRES_ARM_NEON; 9552 for (uint32_t n = 17; n < 32; n++) { 9553 for (size_t k = 1; k <= 40; k += 9) { 9554 for (uint32_t m = 1; m <= 4; m++) { 9555 GemmMicrokernelTester() 9556 .mr(4) 9557 .nr(16) 9558 .kr(1) 9559 .sr(1) 9560 .m(m) 9561 .n(n) 9562 .k(k) 9563 .iterations(1) 9564 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9565 } 9566 } 9567 } 9568 } 9569 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_16)9570 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16) { 9571 TEST_REQUIRES_ARM_NEON; 9572 for (uint32_t n = 32; n <= 48; n += 16) { 9573 for (size_t k = 1; k <= 40; k += 9) { 9574 GemmMicrokernelTester() 9575 .mr(4) 9576 .nr(16) 9577 .kr(1) 9578 .sr(1) 9579 .m(4) 9580 .n(n) 9581 .k(k) 9582 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9583 } 9584 } 9585 } 9586 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_16_strided_cn)9587 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_strided_cn) { 9588 TEST_REQUIRES_ARM_NEON; 9589 for (uint32_t n = 32; n <= 48; n += 16) { 9590 for (size_t k = 1; k <= 40; k += 9) { 9591 GemmMicrokernelTester() 9592 .mr(4) 9593 .nr(16) 9594 .kr(1) 9595 .sr(1) 9596 .m(4) 9597 .n(n) 9598 .k(k) 9599 .cn_stride(19) 9600 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9601 } 9602 } 9603 } 9604 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_16_subtile)9605 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_subtile) { 9606 TEST_REQUIRES_ARM_NEON; 9607 for (uint32_t n = 32; n <= 48; n += 16) { 9608 for (size_t k = 1; k <= 40; k += 9) { 9609 for (uint32_t m = 1; m <= 4; m++) { 9610 GemmMicrokernelTester() 9611 .mr(4) 9612 .nr(16) 9613 .kr(1) 9614 .sr(1) 9615 .m(m) 9616 .n(n) 9617 .k(k) 9618 .iterations(1) 9619 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9620 } 9621 } 9622 } 9623 } 9624 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,small_kernel)9625 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, small_kernel) { 9626 TEST_REQUIRES_ARM_NEON; 9627 for (size_t k = 1; k <= 40; k += 9) { 9628 GemmMicrokernelTester() 9629 .mr(4) 9630 .nr(16) 9631 .kr(1) 9632 .sr(1) 9633 .m(4) 9634 .n(16) 9635 .k(k) 9636 .ks(3) 9637 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9638 } 9639 } 9640 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,small_kernel_subtile)9641 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, small_kernel_subtile) { 9642 TEST_REQUIRES_ARM_NEON; 9643 for (size_t k = 1; k <= 40; k += 9) { 9644 for (uint32_t n = 1; n <= 16; n++) { 9645 for (uint32_t m = 1; m <= 4; m++) { 9646 GemmMicrokernelTester() 9647 .mr(4) 9648 .nr(16) 9649 .kr(1) 9650 .sr(1) 9651 .m(m) 9652 .n(n) 9653 .k(k) 9654 .ks(3) 9655 .iterations(1) 9656 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9657 } 9658 } 9659 } 9660 } 9661 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_16_small_kernel)9662 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_small_kernel) { 9663 TEST_REQUIRES_ARM_NEON; 9664 for (uint32_t n = 17; n < 32; n++) { 9665 for (size_t k = 1; k <= 40; k += 9) { 9666 GemmMicrokernelTester() 9667 .mr(4) 9668 .nr(16) 9669 .kr(1) 9670 .sr(1) 9671 .m(4) 9672 .n(n) 9673 .k(k) 9674 .ks(3) 9675 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9676 } 9677 } 9678 } 9679 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_16_small_kernel)9680 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_small_kernel) { 9681 TEST_REQUIRES_ARM_NEON; 9682 for (uint32_t n = 32; n <= 48; n += 16) { 9683 for (size_t k = 1; k <= 40; k += 9) { 9684 GemmMicrokernelTester() 9685 .mr(4) 9686 .nr(16) 9687 .kr(1) 9688 .sr(1) 9689 .m(4) 9690 .n(n) 9691 .k(k) 9692 .ks(3) 9693 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9694 } 9695 } 9696 } 9697 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,strided_cm_subtile)9698 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) { 9699 TEST_REQUIRES_ARM_NEON; 9700 for (size_t k = 1; k <= 40; k += 9) { 9701 for (uint32_t n = 1; n <= 16; n++) { 9702 for (uint32_t m = 1; m <= 4; m++) { 9703 GemmMicrokernelTester() 9704 .mr(4) 9705 .nr(16) 9706 .kr(1) 9707 .sr(1) 9708 .m(m) 9709 .n(n) 9710 .k(k) 9711 .cm_stride(19) 9712 .iterations(1) 9713 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9714 } 9715 } 9716 } 9717 } 9718 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,a_offset)9719 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, a_offset) { 9720 TEST_REQUIRES_ARM_NEON; 9721 for (size_t k = 1; k <= 40; k += 9) { 9722 GemmMicrokernelTester() 9723 .mr(4) 9724 .nr(16) 9725 .kr(1) 9726 .sr(1) 9727 .m(4) 9728 .n(16) 9729 .k(k) 9730 .ks(3) 9731 .a_offset(163) 9732 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9733 } 9734 } 9735 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,zero)9736 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, zero) { 9737 TEST_REQUIRES_ARM_NEON; 9738 for (size_t k = 1; k <= 40; k += 9) { 9739 for (uint32_t mz = 0; mz < 4; mz++) { 9740 GemmMicrokernelTester() 9741 .mr(4) 9742 .nr(16) 9743 .kr(1) 9744 .sr(1) 9745 .m(4) 9746 .n(16) 9747 .k(k) 9748 .ks(3) 9749 .a_offset(163) 9750 .zero_index(mz) 9751 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9752 } 9753 } 9754 } 9755 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,qmin)9756 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmin) { 9757 TEST_REQUIRES_ARM_NEON; 9758 GemmMicrokernelTester() 9759 .mr(4) 9760 .nr(16) 9761 .kr(1) 9762 .sr(1) 9763 .m(4) 9764 .n(16) 9765 .k(8) 9766 .qmin(128) 9767 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9768 } 9769 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,qmax)9770 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmax) { 9771 TEST_REQUIRES_ARM_NEON; 9772 GemmMicrokernelTester() 9773 .mr(4) 9774 .nr(16) 9775 .kr(1) 9776 .sr(1) 9777 .m(4) 9778 .n(16) 9779 .k(8) 9780 .qmax(128) 9781 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9782 } 9783 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,strided_cm)9784 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm) { 9785 TEST_REQUIRES_ARM_NEON; 9786 GemmMicrokernelTester() 9787 .mr(4) 9788 .nr(16) 9789 .kr(1) 9790 .sr(1) 9791 .m(4) 9792 .n(16) 9793 .k(8) 9794 .cm_stride(19) 9795 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9796 } 9797 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,no_a_zero_point)9798 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, no_a_zero_point) { 9799 TEST_REQUIRES_ARM_NEON; 9800 for (size_t k = 1; k <= 40; k += 9) { 9801 GemmMicrokernelTester() 9802 .mr(4) 9803 .nr(16) 9804 .kr(1) 9805 .sr(1) 9806 .m(4) 9807 .n(16) 9808 .k(k) 9809 .a_zero_point(0) 9810 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9811 } 9812 } 9813 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,no_b_zero_point)9814 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, no_b_zero_point) { 9815 TEST_REQUIRES_ARM_NEON; 9816 for (size_t k = 1; k <= 40; k += 9) { 9817 GemmMicrokernelTester() 9818 .mr(4) 9819 .nr(16) 9820 .kr(1) 9821 .sr(1) 9822 .m(4) 9823 .n(16) 9824 .k(k) 9825 .b_zero_point(0) 9826 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9827 } 9828 } 9829 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,no_zero_point)9830 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, no_zero_point) { 9831 TEST_REQUIRES_ARM_NEON; 9832 for (size_t k = 1; k <= 40; k += 9) { 9833 GemmMicrokernelTester() 9834 .mr(4) 9835 .nr(16) 9836 .kr(1) 9837 .sr(1) 9838 .m(4) 9839 .n(16) 9840 .k(k) 9841 .a_zero_point(0) 9842 .b_zero_point(0) 9843 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9844 } 9845 } 9846 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY 9847 9848 9849 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8)9850 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8) { 9851 TEST_REQUIRES_ARM_NEON; 9852 GemmMicrokernelTester() 9853 .mr(4) 9854 .nr(16) 9855 .kr(1) 9856 .sr(1) 9857 .m(4) 9858 .n(16) 9859 .k(8) 9860 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9861 } 9862 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,strided_cn)9863 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cn) { 9864 TEST_REQUIRES_ARM_NEON; 9865 GemmMicrokernelTester() 9866 .mr(4) 9867 .nr(16) 9868 .kr(1) 9869 .sr(1) 9870 .m(4) 9871 .n(16) 9872 .k(8) 9873 .cn_stride(19) 9874 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9875 } 9876 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8_subtile)9877 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile) { 9878 TEST_REQUIRES_ARM_NEON; 9879 for (uint32_t n = 1; n <= 16; n++) { 9880 for (uint32_t m = 1; m <= 4; m++) { 9881 GemmMicrokernelTester() 9882 .mr(4) 9883 .nr(16) 9884 .kr(1) 9885 .sr(1) 9886 .m(m) 9887 .n(n) 9888 .k(8) 9889 .iterations(1) 9890 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9891 } 9892 } 9893 } 9894 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8_subtile_m)9895 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) { 9896 TEST_REQUIRES_ARM_NEON; 9897 for (uint32_t m = 1; m <= 4; m++) { 9898 GemmMicrokernelTester() 9899 .mr(4) 9900 .nr(16) 9901 .kr(1) 9902 .sr(1) 9903 .m(m) 9904 .n(16) 9905 .k(8) 9906 .iterations(1) 9907 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9908 } 9909 } 9910 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8_subtile_n)9911 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) { 9912 TEST_REQUIRES_ARM_NEON; 9913 for (uint32_t n = 1; n <= 16; n++) { 9914 GemmMicrokernelTester() 9915 .mr(4) 9916 .nr(16) 9917 .kr(1) 9918 .sr(1) 9919 .m(4) 9920 .n(n) 9921 .k(8) 9922 .iterations(1) 9923 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9924 } 9925 } 9926 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_lt_8)9927 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8) { 9928 TEST_REQUIRES_ARM_NEON; 9929 for (size_t k = 1; k < 8; k++) { 9930 GemmMicrokernelTester() 9931 .mr(4) 9932 .nr(16) 9933 .kr(1) 9934 .sr(1) 9935 .m(4) 9936 .n(16) 9937 .k(k) 9938 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9939 } 9940 } 9941 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_lt_8_subtile)9942 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8_subtile) { 9943 TEST_REQUIRES_ARM_NEON; 9944 for (size_t k = 1; k < 8; k++) { 9945 for (uint32_t n = 1; n <= 16; n++) { 9946 for (uint32_t m = 1; m <= 4; m++) { 9947 GemmMicrokernelTester() 9948 .mr(4) 9949 .nr(16) 9950 .kr(1) 9951 .sr(1) 9952 .m(m) 9953 .n(n) 9954 .k(k) 9955 .iterations(1) 9956 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9957 } 9958 } 9959 } 9960 } 9961 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_gt_8)9962 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8) { 9963 TEST_REQUIRES_ARM_NEON; 9964 for (size_t k = 9; k < 16; k++) { 9965 GemmMicrokernelTester() 9966 .mr(4) 9967 .nr(16) 9968 .kr(1) 9969 .sr(1) 9970 .m(4) 9971 .n(16) 9972 .k(k) 9973 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9974 } 9975 } 9976 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_gt_8_subtile)9977 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8_subtile) { 9978 TEST_REQUIRES_ARM_NEON; 9979 for (size_t k = 9; k < 16; k++) { 9980 for (uint32_t n = 1; n <= 16; n++) { 9981 for (uint32_t m = 1; m <= 4; m++) { 9982 GemmMicrokernelTester() 9983 .mr(4) 9984 .nr(16) 9985 .kr(1) 9986 .sr(1) 9987 .m(m) 9988 .n(n) 9989 .k(k) 9990 .iterations(1) 9991 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 9992 } 9993 } 9994 } 9995 } 9996 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_div_8)9997 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8) { 9998 TEST_REQUIRES_ARM_NEON; 9999 for (size_t k = 16; k <= 80; k += 8) { 10000 GemmMicrokernelTester() 10001 .mr(4) 10002 .nr(16) 10003 .kr(1) 10004 .sr(1) 10005 .m(4) 10006 .n(16) 10007 .k(k) 10008 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10009 } 10010 } 10011 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_div_8_subtile)10012 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8_subtile) { 10013 TEST_REQUIRES_ARM_NEON; 10014 for (size_t k = 16; k <= 80; k += 8) { 10015 for (uint32_t n = 1; n <= 16; n++) { 10016 for (uint32_t m = 1; m <= 4; m++) { 10017 GemmMicrokernelTester() 10018 .mr(4) 10019 .nr(16) 10020 .kr(1) 10021 .sr(1) 10022 .m(m) 10023 .n(n) 10024 .k(k) 10025 .iterations(1) 10026 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10027 } 10028 } 10029 } 10030 } 10031 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_gt_16)10032 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16) { 10033 TEST_REQUIRES_ARM_NEON; 10034 for (uint32_t n = 17; n < 32; n++) { 10035 for (size_t k = 1; k <= 40; k += 9) { 10036 GemmMicrokernelTester() 10037 .mr(4) 10038 .nr(16) 10039 .kr(1) 10040 .sr(1) 10041 .m(4) 10042 .n(n) 10043 .k(k) 10044 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10045 } 10046 } 10047 } 10048 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_gt_16_strided_cn)10049 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_strided_cn) { 10050 TEST_REQUIRES_ARM_NEON; 10051 for (uint32_t n = 17; n < 32; n++) { 10052 for (size_t k = 1; k <= 40; k += 9) { 10053 GemmMicrokernelTester() 10054 .mr(4) 10055 .nr(16) 10056 .kr(1) 10057 .sr(1) 10058 .m(4) 10059 .n(n) 10060 .k(k) 10061 .cn_stride(19) 10062 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10063 } 10064 } 10065 } 10066 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_gt_16_subtile)10067 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_subtile) { 10068 TEST_REQUIRES_ARM_NEON; 10069 for (uint32_t n = 17; n < 32; n++) { 10070 for (size_t k = 1; k <= 40; k += 9) { 10071 for (uint32_t m = 1; m <= 4; m++) { 10072 GemmMicrokernelTester() 10073 .mr(4) 10074 .nr(16) 10075 .kr(1) 10076 .sr(1) 10077 .m(m) 10078 .n(n) 10079 .k(k) 10080 .iterations(1) 10081 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10082 } 10083 } 10084 } 10085 } 10086 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_div_16)10087 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16) { 10088 TEST_REQUIRES_ARM_NEON; 10089 for (uint32_t n = 32; n <= 48; n += 16) { 10090 for (size_t k = 1; k <= 40; k += 9) { 10091 GemmMicrokernelTester() 10092 .mr(4) 10093 .nr(16) 10094 .kr(1) 10095 .sr(1) 10096 .m(4) 10097 .n(n) 10098 .k(k) 10099 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10100 } 10101 } 10102 } 10103 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_div_16_strided_cn)10104 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_strided_cn) { 10105 TEST_REQUIRES_ARM_NEON; 10106 for (uint32_t n = 32; n <= 48; n += 16) { 10107 for (size_t k = 1; k <= 40; k += 9) { 10108 GemmMicrokernelTester() 10109 .mr(4) 10110 .nr(16) 10111 .kr(1) 10112 .sr(1) 10113 .m(4) 10114 .n(n) 10115 .k(k) 10116 .cn_stride(19) 10117 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10118 } 10119 } 10120 } 10121 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_div_16_subtile)10122 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_subtile) { 10123 TEST_REQUIRES_ARM_NEON; 10124 for (uint32_t n = 32; n <= 48; n += 16) { 10125 for (size_t k = 1; k <= 40; k += 9) { 10126 for (uint32_t m = 1; m <= 4; m++) { 10127 GemmMicrokernelTester() 10128 .mr(4) 10129 .nr(16) 10130 .kr(1) 10131 .sr(1) 10132 .m(m) 10133 .n(n) 10134 .k(k) 10135 .iterations(1) 10136 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10137 } 10138 } 10139 } 10140 } 10141 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,small_kernel)10142 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, small_kernel) { 10143 TEST_REQUIRES_ARM_NEON; 10144 for (size_t k = 1; k <= 40; k += 9) { 10145 GemmMicrokernelTester() 10146 .mr(4) 10147 .nr(16) 10148 .kr(1) 10149 .sr(1) 10150 .m(4) 10151 .n(16) 10152 .k(k) 10153 .ks(3) 10154 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10155 } 10156 } 10157 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,small_kernel_subtile)10158 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, small_kernel_subtile) { 10159 TEST_REQUIRES_ARM_NEON; 10160 for (size_t k = 1; k <= 40; k += 9) { 10161 for (uint32_t n = 1; n <= 16; n++) { 10162 for (uint32_t m = 1; m <= 4; m++) { 10163 GemmMicrokernelTester() 10164 .mr(4) 10165 .nr(16) 10166 .kr(1) 10167 .sr(1) 10168 .m(m) 10169 .n(n) 10170 .k(k) 10171 .ks(3) 10172 .iterations(1) 10173 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10174 } 10175 } 10176 } 10177 } 10178 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_gt_16_small_kernel)10179 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_small_kernel) { 10180 TEST_REQUIRES_ARM_NEON; 10181 for (uint32_t n = 17; n < 32; n++) { 10182 for (size_t k = 1; k <= 40; k += 9) { 10183 GemmMicrokernelTester() 10184 .mr(4) 10185 .nr(16) 10186 .kr(1) 10187 .sr(1) 10188 .m(4) 10189 .n(n) 10190 .k(k) 10191 .ks(3) 10192 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10193 } 10194 } 10195 } 10196 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_div_16_small_kernel)10197 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_small_kernel) { 10198 TEST_REQUIRES_ARM_NEON; 10199 for (uint32_t n = 32; n <= 48; n += 16) { 10200 for (size_t k = 1; k <= 40; k += 9) { 10201 GemmMicrokernelTester() 10202 .mr(4) 10203 .nr(16) 10204 .kr(1) 10205 .sr(1) 10206 .m(4) 10207 .n(n) 10208 .k(k) 10209 .ks(3) 10210 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10211 } 10212 } 10213 } 10214 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,strided_cm_subtile)10215 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cm_subtile) { 10216 TEST_REQUIRES_ARM_NEON; 10217 for (size_t k = 1; k <= 40; k += 9) { 10218 for (uint32_t n = 1; n <= 16; n++) { 10219 for (uint32_t m = 1; m <= 4; m++) { 10220 GemmMicrokernelTester() 10221 .mr(4) 10222 .nr(16) 10223 .kr(1) 10224 .sr(1) 10225 .m(m) 10226 .n(n) 10227 .k(k) 10228 .cm_stride(19) 10229 .iterations(1) 10230 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10231 } 10232 } 10233 } 10234 } 10235 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,a_offset)10236 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, a_offset) { 10237 TEST_REQUIRES_ARM_NEON; 10238 for (size_t k = 1; k <= 40; k += 9) { 10239 GemmMicrokernelTester() 10240 .mr(4) 10241 .nr(16) 10242 .kr(1) 10243 .sr(1) 10244 .m(4) 10245 .n(16) 10246 .k(k) 10247 .ks(3) 10248 .a_offset(163) 10249 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10250 } 10251 } 10252 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,zero)10253 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, zero) { 10254 TEST_REQUIRES_ARM_NEON; 10255 for (size_t k = 1; k <= 40; k += 9) { 10256 for (uint32_t mz = 0; mz < 4; mz++) { 10257 GemmMicrokernelTester() 10258 .mr(4) 10259 .nr(16) 10260 .kr(1) 10261 .sr(1) 10262 .m(4) 10263 .n(16) 10264 .k(k) 10265 .ks(3) 10266 .a_offset(163) 10267 .zero_index(mz) 10268 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10269 } 10270 } 10271 } 10272 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,qmin)10273 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, qmin) { 10274 TEST_REQUIRES_ARM_NEON; 10275 GemmMicrokernelTester() 10276 .mr(4) 10277 .nr(16) 10278 .kr(1) 10279 .sr(1) 10280 .m(4) 10281 .n(16) 10282 .k(8) 10283 .qmin(128) 10284 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10285 } 10286 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,qmax)10287 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, qmax) { 10288 TEST_REQUIRES_ARM_NEON; 10289 GemmMicrokernelTester() 10290 .mr(4) 10291 .nr(16) 10292 .kr(1) 10293 .sr(1) 10294 .m(4) 10295 .n(16) 10296 .k(8) 10297 .qmax(128) 10298 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10299 } 10300 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,strided_cm)10301 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cm) { 10302 TEST_REQUIRES_ARM_NEON; 10303 GemmMicrokernelTester() 10304 .mr(4) 10305 .nr(16) 10306 .kr(1) 10307 .sr(1) 10308 .m(4) 10309 .n(16) 10310 .k(8) 10311 .cm_stride(19) 10312 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10313 } 10314 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,no_a_zero_point)10315 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, no_a_zero_point) { 10316 TEST_REQUIRES_ARM_NEON; 10317 for (size_t k = 1; k <= 40; k += 9) { 10318 GemmMicrokernelTester() 10319 .mr(4) 10320 .nr(16) 10321 .kr(1) 10322 .sr(1) 10323 .m(4) 10324 .n(16) 10325 .k(k) 10326 .a_zero_point(0) 10327 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10328 } 10329 } 10330 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,no_b_zero_point)10331 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, no_b_zero_point) { 10332 TEST_REQUIRES_ARM_NEON; 10333 for (size_t k = 1; k <= 40; k += 9) { 10334 GemmMicrokernelTester() 10335 .mr(4) 10336 .nr(16) 10337 .kr(1) 10338 .sr(1) 10339 .m(4) 10340 .n(16) 10341 .k(k) 10342 .b_zero_point(0) 10343 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10344 } 10345 } 10346 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64,no_zero_point)10347 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, no_zero_point) { 10348 TEST_REQUIRES_ARM_NEON; 10349 for (size_t k = 1; k <= 40; k += 9) { 10350 GemmMicrokernelTester() 10351 .mr(4) 10352 .nr(16) 10353 .kr(1) 10354 .sr(1) 10355 .m(4) 10356 .n(16) 10357 .k(k) 10358 .a_zero_point(0) 10359 .b_zero_point(0) 10360 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 10361 } 10362 } 10363 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY 10364