1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 // 9 // Auto-generated file. Do not edit! 10 // Specification: test/qs8-igemm-minmax-rndnu.yaml 11 // Generator: tools/generate-gemm-test.py 12 13 14 #include <gtest/gtest.h> 15 16 #include <xnnpack/allocator.h> 17 #include <xnnpack/common.h> 18 #include <xnnpack/isa-checks.h> 19 20 #include <xnnpack/gemm.h> 21 #include <xnnpack/igemm.h> 22 #include <xnnpack/ppmm.h> 23 #include "gemm-microkernel-tester.h" 24 25 26 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8)27 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8) { 28 TEST_REQUIRES_ARM_NEON; 29 GemmMicrokernelTester() 30 .mr(4) 31 .nr(8) 32 .kr(1) 33 .sr(1) 34 .m(4) 35 .n(8) 36 .k(8) 37 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 38 } 39 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,strided_cn)40 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, strided_cn) { 41 TEST_REQUIRES_ARM_NEON; 42 GemmMicrokernelTester() 43 .mr(4) 44 .nr(8) 45 .kr(1) 46 .sr(1) 47 .m(4) 48 .n(8) 49 .k(8) 50 .cn_stride(11) 51 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 52 } 53 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8_subtile)54 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile) { 55 TEST_REQUIRES_ARM_NEON; 56 for (uint32_t n = 1; n <= 8; n++) { 57 for (uint32_t m = 1; m <= 4; m++) { 58 GemmMicrokernelTester() 59 .mr(4) 60 .nr(8) 61 .kr(1) 62 .sr(1) 63 .m(m) 64 .n(n) 65 .k(8) 66 .iterations(1) 67 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 68 } 69 } 70 } 71 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8_subtile_m)72 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) { 73 TEST_REQUIRES_ARM_NEON; 74 for (uint32_t m = 1; m <= 4; m++) { 75 GemmMicrokernelTester() 76 .mr(4) 77 .nr(8) 78 .kr(1) 79 .sr(1) 80 .m(m) 81 .n(8) 82 .k(8) 83 .iterations(1) 84 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 85 } 86 } 87 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8_subtile_n)88 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) { 89 TEST_REQUIRES_ARM_NEON; 90 for (uint32_t n = 1; n <= 8; n++) { 91 GemmMicrokernelTester() 92 .mr(4) 93 .nr(8) 94 .kr(1) 95 .sr(1) 96 .m(4) 97 .n(n) 98 .k(8) 99 .iterations(1) 100 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 101 } 102 } 103 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,k_lt_8)104 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8) { 105 TEST_REQUIRES_ARM_NEON; 106 for (size_t k = 1; k < 8; k++) { 107 GemmMicrokernelTester() 108 .mr(4) 109 .nr(8) 110 .kr(1) 111 .sr(1) 112 .m(4) 113 .n(8) 114 .k(k) 115 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 116 } 117 } 118 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,k_lt_8_subtile)119 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8_subtile) { 120 TEST_REQUIRES_ARM_NEON; 121 for (size_t k = 1; k < 8; k++) { 122 for (uint32_t n = 1; n <= 8; n++) { 123 for (uint32_t m = 1; m <= 4; m++) { 124 GemmMicrokernelTester() 125 .mr(4) 126 .nr(8) 127 .kr(1) 128 .sr(1) 129 .m(m) 130 .n(n) 131 .k(k) 132 .iterations(1) 133 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 134 } 135 } 136 } 137 } 138 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,k_gt_8)139 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8) { 140 TEST_REQUIRES_ARM_NEON; 141 for (size_t k = 9; k < 16; k++) { 142 GemmMicrokernelTester() 143 .mr(4) 144 .nr(8) 145 .kr(1) 146 .sr(1) 147 .m(4) 148 .n(8) 149 .k(k) 150 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 151 } 152 } 153 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,k_gt_8_subtile)154 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8_subtile) { 155 TEST_REQUIRES_ARM_NEON; 156 for (size_t k = 9; k < 16; k++) { 157 for (uint32_t n = 1; n <= 8; n++) { 158 for (uint32_t m = 1; m <= 4; m++) { 159 GemmMicrokernelTester() 160 .mr(4) 161 .nr(8) 162 .kr(1) 163 .sr(1) 164 .m(m) 165 .n(n) 166 .k(k) 167 .iterations(1) 168 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 169 } 170 } 171 } 172 } 173 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,k_div_8)174 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, k_div_8) { 175 TEST_REQUIRES_ARM_NEON; 176 for (size_t k = 16; k <= 80; k += 8) { 177 GemmMicrokernelTester() 178 .mr(4) 179 .nr(8) 180 .kr(1) 181 .sr(1) 182 .m(4) 183 .n(8) 184 .k(k) 185 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 186 } 187 } 188 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,k_div_8_subtile)189 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, k_div_8_subtile) { 190 TEST_REQUIRES_ARM_NEON; 191 for (size_t k = 16; k <= 80; k += 8) { 192 for (uint32_t n = 1; n <= 8; n++) { 193 for (uint32_t m = 1; m <= 4; m++) { 194 GemmMicrokernelTester() 195 .mr(4) 196 .nr(8) 197 .kr(1) 198 .sr(1) 199 .m(m) 200 .n(n) 201 .k(k) 202 .iterations(1) 203 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 204 } 205 } 206 } 207 } 208 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,n_gt_8)209 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, n_gt_8) { 210 TEST_REQUIRES_ARM_NEON; 211 for (uint32_t n = 9; n < 16; n++) { 212 for (size_t k = 1; k <= 40; k += 9) { 213 GemmMicrokernelTester() 214 .mr(4) 215 .nr(8) 216 .kr(1) 217 .sr(1) 218 .m(4) 219 .n(n) 220 .k(k) 221 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 222 } 223 } 224 } 225 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,n_gt_8_strided_cn)226 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, n_gt_8_strided_cn) { 227 TEST_REQUIRES_ARM_NEON; 228 for (uint32_t n = 9; n < 16; n++) { 229 for (size_t k = 1; k <= 40; k += 9) { 230 GemmMicrokernelTester() 231 .mr(4) 232 .nr(8) 233 .kr(1) 234 .sr(1) 235 .m(4) 236 .n(n) 237 .k(k) 238 .cn_stride(11) 239 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 240 } 241 } 242 } 243 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,n_gt_8_subtile)244 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, n_gt_8_subtile) { 245 TEST_REQUIRES_ARM_NEON; 246 for (uint32_t n = 9; n < 16; n++) { 247 for (size_t k = 1; k <= 40; k += 9) { 248 for (uint32_t m = 1; m <= 4; m++) { 249 GemmMicrokernelTester() 250 .mr(4) 251 .nr(8) 252 .kr(1) 253 .sr(1) 254 .m(m) 255 .n(n) 256 .k(k) 257 .iterations(1) 258 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 259 } 260 } 261 } 262 } 263 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,n_div_8)264 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, n_div_8) { 265 TEST_REQUIRES_ARM_NEON; 266 for (uint32_t n = 16; n <= 24; n += 8) { 267 for (size_t k = 1; k <= 40; k += 9) { 268 GemmMicrokernelTester() 269 .mr(4) 270 .nr(8) 271 .kr(1) 272 .sr(1) 273 .m(4) 274 .n(n) 275 .k(k) 276 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 277 } 278 } 279 } 280 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,n_div_8_strided_cn)281 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, n_div_8_strided_cn) { 282 TEST_REQUIRES_ARM_NEON; 283 for (uint32_t n = 16; n <= 24; n += 8) { 284 for (size_t k = 1; k <= 40; k += 9) { 285 GemmMicrokernelTester() 286 .mr(4) 287 .nr(8) 288 .kr(1) 289 .sr(1) 290 .m(4) 291 .n(n) 292 .k(k) 293 .cn_stride(11) 294 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 295 } 296 } 297 } 298 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,n_div_8_subtile)299 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, n_div_8_subtile) { 300 TEST_REQUIRES_ARM_NEON; 301 for (uint32_t n = 16; n <= 24; n += 8) { 302 for (size_t k = 1; k <= 40; k += 9) { 303 for (uint32_t m = 1; m <= 4; m++) { 304 GemmMicrokernelTester() 305 .mr(4) 306 .nr(8) 307 .kr(1) 308 .sr(1) 309 .m(m) 310 .n(n) 311 .k(k) 312 .iterations(1) 313 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 314 } 315 } 316 } 317 } 318 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,small_kernel)319 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, small_kernel) { 320 TEST_REQUIRES_ARM_NEON; 321 for (size_t k = 1; k <= 40; k += 9) { 322 GemmMicrokernelTester() 323 .mr(4) 324 .nr(8) 325 .kr(1) 326 .sr(1) 327 .m(4) 328 .n(8) 329 .k(k) 330 .ks(3) 331 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 332 } 333 } 334 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,small_kernel_subtile)335 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, small_kernel_subtile) { 336 TEST_REQUIRES_ARM_NEON; 337 for (size_t k = 1; k <= 40; k += 9) { 338 for (uint32_t n = 1; n <= 8; n++) { 339 for (uint32_t m = 1; m <= 4; m++) { 340 GemmMicrokernelTester() 341 .mr(4) 342 .nr(8) 343 .kr(1) 344 .sr(1) 345 .m(m) 346 .n(n) 347 .k(k) 348 .ks(3) 349 .iterations(1) 350 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 351 } 352 } 353 } 354 } 355 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,n_gt_8_small_kernel)356 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, n_gt_8_small_kernel) { 357 TEST_REQUIRES_ARM_NEON; 358 for (uint32_t n = 9; n < 16; n++) { 359 for (size_t k = 1; k <= 40; k += 9) { 360 GemmMicrokernelTester() 361 .mr(4) 362 .nr(8) 363 .kr(1) 364 .sr(1) 365 .m(4) 366 .n(n) 367 .k(k) 368 .ks(3) 369 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 370 } 371 } 372 } 373 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,n_div_8_small_kernel)374 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, n_div_8_small_kernel) { 375 TEST_REQUIRES_ARM_NEON; 376 for (uint32_t n = 16; n <= 24; n += 8) { 377 for (size_t k = 1; k <= 40; k += 9) { 378 GemmMicrokernelTester() 379 .mr(4) 380 .nr(8) 381 .kr(1) 382 .sr(1) 383 .m(4) 384 .n(n) 385 .k(k) 386 .ks(3) 387 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 388 } 389 } 390 } 391 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,strided_cm_subtile)392 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, strided_cm_subtile) { 393 TEST_REQUIRES_ARM_NEON; 394 for (size_t k = 1; k <= 40; k += 9) { 395 for (uint32_t n = 1; n <= 8; n++) { 396 for (uint32_t m = 1; m <= 4; m++) { 397 GemmMicrokernelTester() 398 .mr(4) 399 .nr(8) 400 .kr(1) 401 .sr(1) 402 .m(m) 403 .n(n) 404 .k(k) 405 .cm_stride(11) 406 .iterations(1) 407 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 408 } 409 } 410 } 411 } 412 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,a_offset)413 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, a_offset) { 414 TEST_REQUIRES_ARM_NEON; 415 for (size_t k = 1; k <= 40; k += 9) { 416 GemmMicrokernelTester() 417 .mr(4) 418 .nr(8) 419 .kr(1) 420 .sr(1) 421 .m(4) 422 .n(8) 423 .k(k) 424 .ks(3) 425 .a_offset(163) 426 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 427 } 428 } 429 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,zero)430 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, zero) { 431 TEST_REQUIRES_ARM_NEON; 432 for (size_t k = 1; k <= 40; k += 9) { 433 for (uint32_t mz = 0; mz < 4; mz++) { 434 GemmMicrokernelTester() 435 .mr(4) 436 .nr(8) 437 .kr(1) 438 .sr(1) 439 .m(4) 440 .n(8) 441 .k(k) 442 .ks(3) 443 .a_offset(163) 444 .zero_index(mz) 445 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 446 } 447 } 448 } 449 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,qmin)450 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, qmin) { 451 TEST_REQUIRES_ARM_NEON; 452 GemmMicrokernelTester() 453 .mr(4) 454 .nr(8) 455 .kr(1) 456 .sr(1) 457 .m(4) 458 .n(8) 459 .k(8) 460 .qmin(128) 461 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 462 } 463 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,qmax)464 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, qmax) { 465 TEST_REQUIRES_ARM_NEON; 466 GemmMicrokernelTester() 467 .mr(4) 468 .nr(8) 469 .kr(1) 470 .sr(1) 471 .m(4) 472 .n(8) 473 .k(8) 474 .qmax(128) 475 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 476 } 477 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64,strided_cm)478 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, strided_cm) { 479 TEST_REQUIRES_ARM_NEON; 480 GemmMicrokernelTester() 481 .mr(4) 482 .nr(8) 483 .kr(1) 484 .sr(1) 485 .m(4) 486 .n(8) 487 .k(8) 488 .cm_stride(11) 489 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 490 } 491 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY 492 493 494 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,k_eq_8)495 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_eq_8) { 496 TEST_REQUIRES_ARM_NEON; 497 GemmMicrokernelTester() 498 .mr(1) 499 .nr(8) 500 .kr(2) 501 .sr(1) 502 .m(1) 503 .n(8) 504 .k(8) 505 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 506 } 507 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,strided_cn)508 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, strided_cn) { 509 TEST_REQUIRES_ARM_NEON; 510 GemmMicrokernelTester() 511 .mr(1) 512 .nr(8) 513 .kr(2) 514 .sr(1) 515 .m(1) 516 .n(8) 517 .k(8) 518 .cn_stride(11) 519 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 520 } 521 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,k_eq_8_subtile)522 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_eq_8_subtile) { 523 TEST_REQUIRES_ARM_NEON; 524 for (uint32_t n = 1; n <= 8; n++) { 525 for (uint32_t m = 1; m <= 1; m++) { 526 GemmMicrokernelTester() 527 .mr(1) 528 .nr(8) 529 .kr(2) 530 .sr(1) 531 .m(m) 532 .n(n) 533 .k(8) 534 .iterations(1) 535 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 536 } 537 } 538 } 539 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,k_eq_8_subtile_m)540 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_eq_8_subtile_m) { 541 TEST_REQUIRES_ARM_NEON; 542 for (uint32_t m = 1; m <= 1; m++) { 543 GemmMicrokernelTester() 544 .mr(1) 545 .nr(8) 546 .kr(2) 547 .sr(1) 548 .m(m) 549 .n(8) 550 .k(8) 551 .iterations(1) 552 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 553 } 554 } 555 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,k_eq_8_subtile_n)556 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_eq_8_subtile_n) { 557 TEST_REQUIRES_ARM_NEON; 558 for (uint32_t n = 1; n <= 8; n++) { 559 GemmMicrokernelTester() 560 .mr(1) 561 .nr(8) 562 .kr(2) 563 .sr(1) 564 .m(1) 565 .n(n) 566 .k(8) 567 .iterations(1) 568 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 569 } 570 } 571 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,k_lt_8)572 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_lt_8) { 573 TEST_REQUIRES_ARM_NEON; 574 for (size_t k = 1; k < 8; k++) { 575 GemmMicrokernelTester() 576 .mr(1) 577 .nr(8) 578 .kr(2) 579 .sr(1) 580 .m(1) 581 .n(8) 582 .k(k) 583 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 584 } 585 } 586 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,k_lt_8_subtile)587 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_lt_8_subtile) { 588 TEST_REQUIRES_ARM_NEON; 589 for (size_t k = 1; k < 8; k++) { 590 for (uint32_t n = 1; n <= 8; n++) { 591 for (uint32_t m = 1; m <= 1; m++) { 592 GemmMicrokernelTester() 593 .mr(1) 594 .nr(8) 595 .kr(2) 596 .sr(1) 597 .m(m) 598 .n(n) 599 .k(k) 600 .iterations(1) 601 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 602 } 603 } 604 } 605 } 606 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,k_gt_8)607 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_gt_8) { 608 TEST_REQUIRES_ARM_NEON; 609 for (size_t k = 9; k < 16; k++) { 610 GemmMicrokernelTester() 611 .mr(1) 612 .nr(8) 613 .kr(2) 614 .sr(1) 615 .m(1) 616 .n(8) 617 .k(k) 618 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 619 } 620 } 621 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,k_gt_8_subtile)622 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_gt_8_subtile) { 623 TEST_REQUIRES_ARM_NEON; 624 for (size_t k = 9; k < 16; k++) { 625 for (uint32_t n = 1; n <= 8; n++) { 626 for (uint32_t m = 1; m <= 1; m++) { 627 GemmMicrokernelTester() 628 .mr(1) 629 .nr(8) 630 .kr(2) 631 .sr(1) 632 .m(m) 633 .n(n) 634 .k(k) 635 .iterations(1) 636 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 637 } 638 } 639 } 640 } 641 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,k_div_8)642 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_div_8) { 643 TEST_REQUIRES_ARM_NEON; 644 for (size_t k = 16; k <= 80; k += 8) { 645 GemmMicrokernelTester() 646 .mr(1) 647 .nr(8) 648 .kr(2) 649 .sr(1) 650 .m(1) 651 .n(8) 652 .k(k) 653 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 654 } 655 } 656 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,k_div_8_subtile)657 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_div_8_subtile) { 658 TEST_REQUIRES_ARM_NEON; 659 for (size_t k = 16; k <= 80; k += 8) { 660 for (uint32_t n = 1; n <= 8; n++) { 661 for (uint32_t m = 1; m <= 1; m++) { 662 GemmMicrokernelTester() 663 .mr(1) 664 .nr(8) 665 .kr(2) 666 .sr(1) 667 .m(m) 668 .n(n) 669 .k(k) 670 .iterations(1) 671 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 672 } 673 } 674 } 675 } 676 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,n_gt_8)677 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_gt_8) { 678 TEST_REQUIRES_ARM_NEON; 679 for (uint32_t n = 9; n < 16; n++) { 680 for (size_t k = 1; k <= 40; k += 9) { 681 GemmMicrokernelTester() 682 .mr(1) 683 .nr(8) 684 .kr(2) 685 .sr(1) 686 .m(1) 687 .n(n) 688 .k(k) 689 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 690 } 691 } 692 } 693 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,n_gt_8_strided_cn)694 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_gt_8_strided_cn) { 695 TEST_REQUIRES_ARM_NEON; 696 for (uint32_t n = 9; n < 16; n++) { 697 for (size_t k = 1; k <= 40; k += 9) { 698 GemmMicrokernelTester() 699 .mr(1) 700 .nr(8) 701 .kr(2) 702 .sr(1) 703 .m(1) 704 .n(n) 705 .k(k) 706 .cn_stride(11) 707 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 708 } 709 } 710 } 711 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,n_gt_8_subtile)712 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_gt_8_subtile) { 713 TEST_REQUIRES_ARM_NEON; 714 for (uint32_t n = 9; n < 16; n++) { 715 for (size_t k = 1; k <= 40; k += 9) { 716 for (uint32_t m = 1; m <= 1; m++) { 717 GemmMicrokernelTester() 718 .mr(1) 719 .nr(8) 720 .kr(2) 721 .sr(1) 722 .m(m) 723 .n(n) 724 .k(k) 725 .iterations(1) 726 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 727 } 728 } 729 } 730 } 731 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,n_div_8)732 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_div_8) { 733 TEST_REQUIRES_ARM_NEON; 734 for (uint32_t n = 16; n <= 24; n += 8) { 735 for (size_t k = 1; k <= 40; k += 9) { 736 GemmMicrokernelTester() 737 .mr(1) 738 .nr(8) 739 .kr(2) 740 .sr(1) 741 .m(1) 742 .n(n) 743 .k(k) 744 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 745 } 746 } 747 } 748 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,n_div_8_strided_cn)749 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_div_8_strided_cn) { 750 TEST_REQUIRES_ARM_NEON; 751 for (uint32_t n = 16; n <= 24; n += 8) { 752 for (size_t k = 1; k <= 40; k += 9) { 753 GemmMicrokernelTester() 754 .mr(1) 755 .nr(8) 756 .kr(2) 757 .sr(1) 758 .m(1) 759 .n(n) 760 .k(k) 761 .cn_stride(11) 762 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 763 } 764 } 765 } 766 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,n_div_8_subtile)767 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_div_8_subtile) { 768 TEST_REQUIRES_ARM_NEON; 769 for (uint32_t n = 16; n <= 24; n += 8) { 770 for (size_t k = 1; k <= 40; k += 9) { 771 for (uint32_t m = 1; m <= 1; m++) { 772 GemmMicrokernelTester() 773 .mr(1) 774 .nr(8) 775 .kr(2) 776 .sr(1) 777 .m(m) 778 .n(n) 779 .k(k) 780 .iterations(1) 781 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 782 } 783 } 784 } 785 } 786 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,small_kernel)787 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, small_kernel) { 788 TEST_REQUIRES_ARM_NEON; 789 for (size_t k = 1; k <= 40; k += 9) { 790 GemmMicrokernelTester() 791 .mr(1) 792 .nr(8) 793 .kr(2) 794 .sr(1) 795 .m(1) 796 .n(8) 797 .k(k) 798 .ks(3) 799 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 800 } 801 } 802 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,small_kernel_subtile)803 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, small_kernel_subtile) { 804 TEST_REQUIRES_ARM_NEON; 805 for (size_t k = 1; k <= 40; k += 9) { 806 for (uint32_t n = 1; n <= 8; n++) { 807 for (uint32_t m = 1; m <= 1; m++) { 808 GemmMicrokernelTester() 809 .mr(1) 810 .nr(8) 811 .kr(2) 812 .sr(1) 813 .m(m) 814 .n(n) 815 .k(k) 816 .ks(3) 817 .iterations(1) 818 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 819 } 820 } 821 } 822 } 823 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,n_gt_8_small_kernel)824 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_gt_8_small_kernel) { 825 TEST_REQUIRES_ARM_NEON; 826 for (uint32_t n = 9; n < 16; n++) { 827 for (size_t k = 1; k <= 40; k += 9) { 828 GemmMicrokernelTester() 829 .mr(1) 830 .nr(8) 831 .kr(2) 832 .sr(1) 833 .m(1) 834 .n(n) 835 .k(k) 836 .ks(3) 837 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 838 } 839 } 840 } 841 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,n_div_8_small_kernel)842 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_div_8_small_kernel) { 843 TEST_REQUIRES_ARM_NEON; 844 for (uint32_t n = 16; n <= 24; n += 8) { 845 for (size_t k = 1; k <= 40; k += 9) { 846 GemmMicrokernelTester() 847 .mr(1) 848 .nr(8) 849 .kr(2) 850 .sr(1) 851 .m(1) 852 .n(n) 853 .k(k) 854 .ks(3) 855 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 856 } 857 } 858 } 859 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,strided_cm_subtile)860 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, strided_cm_subtile) { 861 TEST_REQUIRES_ARM_NEON; 862 for (size_t k = 1; k <= 40; k += 9) { 863 for (uint32_t n = 1; n <= 8; n++) { 864 for (uint32_t m = 1; m <= 1; m++) { 865 GemmMicrokernelTester() 866 .mr(1) 867 .nr(8) 868 .kr(2) 869 .sr(1) 870 .m(m) 871 .n(n) 872 .k(k) 873 .cm_stride(11) 874 .iterations(1) 875 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 876 } 877 } 878 } 879 } 880 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,a_offset)881 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, a_offset) { 882 TEST_REQUIRES_ARM_NEON; 883 for (size_t k = 1; k <= 40; k += 9) { 884 GemmMicrokernelTester() 885 .mr(1) 886 .nr(8) 887 .kr(2) 888 .sr(1) 889 .m(1) 890 .n(8) 891 .k(k) 892 .ks(3) 893 .a_offset(43) 894 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 895 } 896 } 897 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,zero)898 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, zero) { 899 TEST_REQUIRES_ARM_NEON; 900 for (size_t k = 1; k <= 40; k += 9) { 901 for (uint32_t mz = 0; mz < 1; mz++) { 902 GemmMicrokernelTester() 903 .mr(1) 904 .nr(8) 905 .kr(2) 906 .sr(1) 907 .m(1) 908 .n(8) 909 .k(k) 910 .ks(3) 911 .a_offset(43) 912 .zero_index(mz) 913 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 914 } 915 } 916 } 917 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,qmin)918 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, qmin) { 919 TEST_REQUIRES_ARM_NEON; 920 GemmMicrokernelTester() 921 .mr(1) 922 .nr(8) 923 .kr(2) 924 .sr(1) 925 .m(1) 926 .n(8) 927 .k(8) 928 .qmin(128) 929 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 930 } 931 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,qmax)932 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, qmax) { 933 TEST_REQUIRES_ARM_NEON; 934 GemmMicrokernelTester() 935 .mr(1) 936 .nr(8) 937 .kr(2) 938 .sr(1) 939 .m(1) 940 .n(8) 941 .k(8) 942 .qmax(128) 943 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 944 } 945 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R,strided_cm)946 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, strided_cm) { 947 TEST_REQUIRES_ARM_NEON; 948 GemmMicrokernelTester() 949 .mr(1) 950 .nr(8) 951 .kr(2) 952 .sr(1) 953 .m(1) 954 .n(8) 955 .k(8) 956 .cm_stride(11) 957 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 958 } 959 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 960 961 962 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,k_eq_16)963 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_eq_16) { 964 TEST_REQUIRES_ARM_NEON; 965 GemmMicrokernelTester() 966 .mr(1) 967 .nr(8) 968 .kr(2) 969 .sr(1) 970 .m(1) 971 .n(8) 972 .k(16) 973 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 974 } 975 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,strided_cn)976 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, strided_cn) { 977 TEST_REQUIRES_ARM_NEON; 978 GemmMicrokernelTester() 979 .mr(1) 980 .nr(8) 981 .kr(2) 982 .sr(1) 983 .m(1) 984 .n(8) 985 .k(16) 986 .cn_stride(11) 987 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 988 } 989 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,k_eq_16_subtile)990 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_eq_16_subtile) { 991 TEST_REQUIRES_ARM_NEON; 992 for (uint32_t n = 1; n <= 8; n++) { 993 for (uint32_t m = 1; m <= 1; m++) { 994 GemmMicrokernelTester() 995 .mr(1) 996 .nr(8) 997 .kr(2) 998 .sr(1) 999 .m(m) 1000 .n(n) 1001 .k(16) 1002 .iterations(1) 1003 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1004 } 1005 } 1006 } 1007 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_m)1008 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) { 1009 TEST_REQUIRES_ARM_NEON; 1010 for (uint32_t m = 1; m <= 1; m++) { 1011 GemmMicrokernelTester() 1012 .mr(1) 1013 .nr(8) 1014 .kr(2) 1015 .sr(1) 1016 .m(m) 1017 .n(8) 1018 .k(16) 1019 .iterations(1) 1020 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1021 } 1022 } 1023 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_n)1024 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) { 1025 TEST_REQUIRES_ARM_NEON; 1026 for (uint32_t n = 1; n <= 8; n++) { 1027 GemmMicrokernelTester() 1028 .mr(1) 1029 .nr(8) 1030 .kr(2) 1031 .sr(1) 1032 .m(1) 1033 .n(n) 1034 .k(16) 1035 .iterations(1) 1036 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1037 } 1038 } 1039 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,k_lt_16)1040 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_lt_16) { 1041 TEST_REQUIRES_ARM_NEON; 1042 for (size_t k = 1; k < 16; k++) { 1043 GemmMicrokernelTester() 1044 .mr(1) 1045 .nr(8) 1046 .kr(2) 1047 .sr(1) 1048 .m(1) 1049 .n(8) 1050 .k(k) 1051 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1052 } 1053 } 1054 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,k_lt_16_subtile)1055 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_lt_16_subtile) { 1056 TEST_REQUIRES_ARM_NEON; 1057 for (size_t k = 1; k < 16; k++) { 1058 for (uint32_t n = 1; n <= 8; n++) { 1059 for (uint32_t m = 1; m <= 1; m++) { 1060 GemmMicrokernelTester() 1061 .mr(1) 1062 .nr(8) 1063 .kr(2) 1064 .sr(1) 1065 .m(m) 1066 .n(n) 1067 .k(k) 1068 .iterations(1) 1069 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1070 } 1071 } 1072 } 1073 } 1074 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,k_gt_16)1075 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_gt_16) { 1076 TEST_REQUIRES_ARM_NEON; 1077 for (size_t k = 17; k < 32; k++) { 1078 GemmMicrokernelTester() 1079 .mr(1) 1080 .nr(8) 1081 .kr(2) 1082 .sr(1) 1083 .m(1) 1084 .n(8) 1085 .k(k) 1086 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1087 } 1088 } 1089 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,k_gt_16_subtile)1090 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_gt_16_subtile) { 1091 TEST_REQUIRES_ARM_NEON; 1092 for (size_t k = 17; k < 32; k++) { 1093 for (uint32_t n = 1; n <= 8; n++) { 1094 for (uint32_t m = 1; m <= 1; m++) { 1095 GemmMicrokernelTester() 1096 .mr(1) 1097 .nr(8) 1098 .kr(2) 1099 .sr(1) 1100 .m(m) 1101 .n(n) 1102 .k(k) 1103 .iterations(1) 1104 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1105 } 1106 } 1107 } 1108 } 1109 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,k_div_16)1110 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_div_16) { 1111 TEST_REQUIRES_ARM_NEON; 1112 for (size_t k = 32; k <= 160; k += 16) { 1113 GemmMicrokernelTester() 1114 .mr(1) 1115 .nr(8) 1116 .kr(2) 1117 .sr(1) 1118 .m(1) 1119 .n(8) 1120 .k(k) 1121 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1122 } 1123 } 1124 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,k_div_16_subtile)1125 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, k_div_16_subtile) { 1126 TEST_REQUIRES_ARM_NEON; 1127 for (size_t k = 32; k <= 160; k += 16) { 1128 for (uint32_t n = 1; n <= 8; n++) { 1129 for (uint32_t m = 1; m <= 1; m++) { 1130 GemmMicrokernelTester() 1131 .mr(1) 1132 .nr(8) 1133 .kr(2) 1134 .sr(1) 1135 .m(m) 1136 .n(n) 1137 .k(k) 1138 .iterations(1) 1139 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1140 } 1141 } 1142 } 1143 } 1144 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,n_gt_8)1145 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, n_gt_8) { 1146 TEST_REQUIRES_ARM_NEON; 1147 for (uint32_t n = 9; n < 16; n++) { 1148 for (size_t k = 1; k <= 80; k += 17) { 1149 GemmMicrokernelTester() 1150 .mr(1) 1151 .nr(8) 1152 .kr(2) 1153 .sr(1) 1154 .m(1) 1155 .n(n) 1156 .k(k) 1157 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1158 } 1159 } 1160 } 1161 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,n_gt_8_strided_cn)1162 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, n_gt_8_strided_cn) { 1163 TEST_REQUIRES_ARM_NEON; 1164 for (uint32_t n = 9; n < 16; n++) { 1165 for (size_t k = 1; k <= 80; k += 17) { 1166 GemmMicrokernelTester() 1167 .mr(1) 1168 .nr(8) 1169 .kr(2) 1170 .sr(1) 1171 .m(1) 1172 .n(n) 1173 .k(k) 1174 .cn_stride(11) 1175 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1176 } 1177 } 1178 } 1179 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,n_gt_8_subtile)1180 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, n_gt_8_subtile) { 1181 TEST_REQUIRES_ARM_NEON; 1182 for (uint32_t n = 9; n < 16; n++) { 1183 for (size_t k = 1; k <= 80; k += 17) { 1184 for (uint32_t m = 1; m <= 1; m++) { 1185 GemmMicrokernelTester() 1186 .mr(1) 1187 .nr(8) 1188 .kr(2) 1189 .sr(1) 1190 .m(m) 1191 .n(n) 1192 .k(k) 1193 .iterations(1) 1194 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1195 } 1196 } 1197 } 1198 } 1199 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,n_div_8)1200 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, n_div_8) { 1201 TEST_REQUIRES_ARM_NEON; 1202 for (uint32_t n = 16; n <= 24; n += 8) { 1203 for (size_t k = 1; k <= 80; k += 17) { 1204 GemmMicrokernelTester() 1205 .mr(1) 1206 .nr(8) 1207 .kr(2) 1208 .sr(1) 1209 .m(1) 1210 .n(n) 1211 .k(k) 1212 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1213 } 1214 } 1215 } 1216 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,n_div_8_strided_cn)1217 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, n_div_8_strided_cn) { 1218 TEST_REQUIRES_ARM_NEON; 1219 for (uint32_t n = 16; n <= 24; n += 8) { 1220 for (size_t k = 1; k <= 80; k += 17) { 1221 GemmMicrokernelTester() 1222 .mr(1) 1223 .nr(8) 1224 .kr(2) 1225 .sr(1) 1226 .m(1) 1227 .n(n) 1228 .k(k) 1229 .cn_stride(11) 1230 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1231 } 1232 } 1233 } 1234 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,n_div_8_subtile)1235 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, n_div_8_subtile) { 1236 TEST_REQUIRES_ARM_NEON; 1237 for (uint32_t n = 16; n <= 24; n += 8) { 1238 for (size_t k = 1; k <= 80; k += 17) { 1239 for (uint32_t m = 1; m <= 1; m++) { 1240 GemmMicrokernelTester() 1241 .mr(1) 1242 .nr(8) 1243 .kr(2) 1244 .sr(1) 1245 .m(m) 1246 .n(n) 1247 .k(k) 1248 .iterations(1) 1249 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1250 } 1251 } 1252 } 1253 } 1254 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,small_kernel)1255 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, small_kernel) { 1256 TEST_REQUIRES_ARM_NEON; 1257 for (size_t k = 1; k <= 80; k += 17) { 1258 GemmMicrokernelTester() 1259 .mr(1) 1260 .nr(8) 1261 .kr(2) 1262 .sr(1) 1263 .m(1) 1264 .n(8) 1265 .k(k) 1266 .ks(3) 1267 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1268 } 1269 } 1270 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,small_kernel_subtile)1271 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, small_kernel_subtile) { 1272 TEST_REQUIRES_ARM_NEON; 1273 for (size_t k = 1; k <= 80; k += 17) { 1274 for (uint32_t n = 1; n <= 8; n++) { 1275 for (uint32_t m = 1; m <= 1; m++) { 1276 GemmMicrokernelTester() 1277 .mr(1) 1278 .nr(8) 1279 .kr(2) 1280 .sr(1) 1281 .m(m) 1282 .n(n) 1283 .k(k) 1284 .ks(3) 1285 .iterations(1) 1286 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1287 } 1288 } 1289 } 1290 } 1291 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,n_gt_8_small_kernel)1292 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, n_gt_8_small_kernel) { 1293 TEST_REQUIRES_ARM_NEON; 1294 for (uint32_t n = 9; n < 16; n++) { 1295 for (size_t k = 1; k <= 80; k += 17) { 1296 GemmMicrokernelTester() 1297 .mr(1) 1298 .nr(8) 1299 .kr(2) 1300 .sr(1) 1301 .m(1) 1302 .n(n) 1303 .k(k) 1304 .ks(3) 1305 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1306 } 1307 } 1308 } 1309 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,n_div_8_small_kernel)1310 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, n_div_8_small_kernel) { 1311 TEST_REQUIRES_ARM_NEON; 1312 for (uint32_t n = 16; n <= 24; n += 8) { 1313 for (size_t k = 1; k <= 80; k += 17) { 1314 GemmMicrokernelTester() 1315 .mr(1) 1316 .nr(8) 1317 .kr(2) 1318 .sr(1) 1319 .m(1) 1320 .n(n) 1321 .k(k) 1322 .ks(3) 1323 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1324 } 1325 } 1326 } 1327 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,strided_cm_subtile)1328 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, strided_cm_subtile) { 1329 TEST_REQUIRES_ARM_NEON; 1330 for (size_t k = 1; k <= 80; k += 17) { 1331 for (uint32_t n = 1; n <= 8; n++) { 1332 for (uint32_t m = 1; m <= 1; m++) { 1333 GemmMicrokernelTester() 1334 .mr(1) 1335 .nr(8) 1336 .kr(2) 1337 .sr(1) 1338 .m(m) 1339 .n(n) 1340 .k(k) 1341 .cm_stride(11) 1342 .iterations(1) 1343 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1344 } 1345 } 1346 } 1347 } 1348 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,a_offset)1349 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, a_offset) { 1350 TEST_REQUIRES_ARM_NEON; 1351 for (size_t k = 1; k <= 80; k += 17) { 1352 GemmMicrokernelTester() 1353 .mr(1) 1354 .nr(8) 1355 .kr(2) 1356 .sr(1) 1357 .m(1) 1358 .n(8) 1359 .k(k) 1360 .ks(3) 1361 .a_offset(83) 1362 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1363 } 1364 } 1365 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,zero)1366 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, zero) { 1367 TEST_REQUIRES_ARM_NEON; 1368 for (size_t k = 1; k <= 80; k += 17) { 1369 for (uint32_t mz = 0; mz < 1; mz++) { 1370 GemmMicrokernelTester() 1371 .mr(1) 1372 .nr(8) 1373 .kr(2) 1374 .sr(1) 1375 .m(1) 1376 .n(8) 1377 .k(k) 1378 .ks(3) 1379 .a_offset(83) 1380 .zero_index(mz) 1381 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1382 } 1383 } 1384 } 1385 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,qmin)1386 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, qmin) { 1387 TEST_REQUIRES_ARM_NEON; 1388 GemmMicrokernelTester() 1389 .mr(1) 1390 .nr(8) 1391 .kr(2) 1392 .sr(1) 1393 .m(1) 1394 .n(8) 1395 .k(16) 1396 .qmin(128) 1397 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1398 } 1399 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,qmax)1400 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, qmax) { 1401 TEST_REQUIRES_ARM_NEON; 1402 GemmMicrokernelTester() 1403 .mr(1) 1404 .nr(8) 1405 .kr(2) 1406 .sr(1) 1407 .m(1) 1408 .n(8) 1409 .k(16) 1410 .qmax(128) 1411 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1412 } 1413 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R,strided_cm)1414 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD1R, strided_cm) { 1415 TEST_REQUIRES_ARM_NEON; 1416 GemmMicrokernelTester() 1417 .mr(1) 1418 .nr(8) 1419 .kr(2) 1420 .sr(1) 1421 .m(1) 1422 .n(8) 1423 .k(16) 1424 .cm_stride(11) 1425 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1426 } 1427 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 1428 1429 1430 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,k_eq_8)1431 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, k_eq_8) { 1432 TEST_REQUIRES_ARM_NEON; 1433 GemmMicrokernelTester() 1434 .mr(2) 1435 .nr(8) 1436 .kr(2) 1437 .sr(1) 1438 .m(2) 1439 .n(8) 1440 .k(8) 1441 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1442 } 1443 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,strided_cn)1444 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, strided_cn) { 1445 TEST_REQUIRES_ARM_NEON; 1446 GemmMicrokernelTester() 1447 .mr(2) 1448 .nr(8) 1449 .kr(2) 1450 .sr(1) 1451 .m(2) 1452 .n(8) 1453 .k(8) 1454 .cn_stride(11) 1455 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1456 } 1457 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,k_eq_8_subtile)1458 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, k_eq_8_subtile) { 1459 TEST_REQUIRES_ARM_NEON; 1460 for (uint32_t n = 1; n <= 8; n++) { 1461 for (uint32_t m = 1; m <= 2; m++) { 1462 GemmMicrokernelTester() 1463 .mr(2) 1464 .nr(8) 1465 .kr(2) 1466 .sr(1) 1467 .m(m) 1468 .n(n) 1469 .k(8) 1470 .iterations(1) 1471 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1472 } 1473 } 1474 } 1475 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,k_eq_8_subtile_m)1476 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, k_eq_8_subtile_m) { 1477 TEST_REQUIRES_ARM_NEON; 1478 for (uint32_t m = 1; m <= 2; m++) { 1479 GemmMicrokernelTester() 1480 .mr(2) 1481 .nr(8) 1482 .kr(2) 1483 .sr(1) 1484 .m(m) 1485 .n(8) 1486 .k(8) 1487 .iterations(1) 1488 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1489 } 1490 } 1491 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,k_eq_8_subtile_n)1492 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, k_eq_8_subtile_n) { 1493 TEST_REQUIRES_ARM_NEON; 1494 for (uint32_t n = 1; n <= 8; n++) { 1495 GemmMicrokernelTester() 1496 .mr(2) 1497 .nr(8) 1498 .kr(2) 1499 .sr(1) 1500 .m(2) 1501 .n(n) 1502 .k(8) 1503 .iterations(1) 1504 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1505 } 1506 } 1507 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,k_lt_8)1508 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, k_lt_8) { 1509 TEST_REQUIRES_ARM_NEON; 1510 for (size_t k = 1; k < 8; k++) { 1511 GemmMicrokernelTester() 1512 .mr(2) 1513 .nr(8) 1514 .kr(2) 1515 .sr(1) 1516 .m(2) 1517 .n(8) 1518 .k(k) 1519 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1520 } 1521 } 1522 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,k_lt_8_subtile)1523 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, k_lt_8_subtile) { 1524 TEST_REQUIRES_ARM_NEON; 1525 for (size_t k = 1; k < 8; k++) { 1526 for (uint32_t n = 1; n <= 8; n++) { 1527 for (uint32_t m = 1; m <= 2; m++) { 1528 GemmMicrokernelTester() 1529 .mr(2) 1530 .nr(8) 1531 .kr(2) 1532 .sr(1) 1533 .m(m) 1534 .n(n) 1535 .k(k) 1536 .iterations(1) 1537 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1538 } 1539 } 1540 } 1541 } 1542 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,k_gt_8)1543 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, k_gt_8) { 1544 TEST_REQUIRES_ARM_NEON; 1545 for (size_t k = 9; k < 16; k++) { 1546 GemmMicrokernelTester() 1547 .mr(2) 1548 .nr(8) 1549 .kr(2) 1550 .sr(1) 1551 .m(2) 1552 .n(8) 1553 .k(k) 1554 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1555 } 1556 } 1557 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,k_gt_8_subtile)1558 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, k_gt_8_subtile) { 1559 TEST_REQUIRES_ARM_NEON; 1560 for (size_t k = 9; k < 16; k++) { 1561 for (uint32_t n = 1; n <= 8; n++) { 1562 for (uint32_t m = 1; m <= 2; m++) { 1563 GemmMicrokernelTester() 1564 .mr(2) 1565 .nr(8) 1566 .kr(2) 1567 .sr(1) 1568 .m(m) 1569 .n(n) 1570 .k(k) 1571 .iterations(1) 1572 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1573 } 1574 } 1575 } 1576 } 1577 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,k_div_8)1578 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, k_div_8) { 1579 TEST_REQUIRES_ARM_NEON; 1580 for (size_t k = 16; k <= 80; k += 8) { 1581 GemmMicrokernelTester() 1582 .mr(2) 1583 .nr(8) 1584 .kr(2) 1585 .sr(1) 1586 .m(2) 1587 .n(8) 1588 .k(k) 1589 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1590 } 1591 } 1592 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,k_div_8_subtile)1593 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, k_div_8_subtile) { 1594 TEST_REQUIRES_ARM_NEON; 1595 for (size_t k = 16; k <= 80; k += 8) { 1596 for (uint32_t n = 1; n <= 8; n++) { 1597 for (uint32_t m = 1; m <= 2; m++) { 1598 GemmMicrokernelTester() 1599 .mr(2) 1600 .nr(8) 1601 .kr(2) 1602 .sr(1) 1603 .m(m) 1604 .n(n) 1605 .k(k) 1606 .iterations(1) 1607 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1608 } 1609 } 1610 } 1611 } 1612 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,n_gt_8)1613 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, n_gt_8) { 1614 TEST_REQUIRES_ARM_NEON; 1615 for (uint32_t n = 9; n < 16; n++) { 1616 for (size_t k = 1; k <= 40; k += 9) { 1617 GemmMicrokernelTester() 1618 .mr(2) 1619 .nr(8) 1620 .kr(2) 1621 .sr(1) 1622 .m(2) 1623 .n(n) 1624 .k(k) 1625 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1626 } 1627 } 1628 } 1629 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,n_gt_8_strided_cn)1630 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, n_gt_8_strided_cn) { 1631 TEST_REQUIRES_ARM_NEON; 1632 for (uint32_t n = 9; n < 16; n++) { 1633 for (size_t k = 1; k <= 40; k += 9) { 1634 GemmMicrokernelTester() 1635 .mr(2) 1636 .nr(8) 1637 .kr(2) 1638 .sr(1) 1639 .m(2) 1640 .n(n) 1641 .k(k) 1642 .cn_stride(11) 1643 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1644 } 1645 } 1646 } 1647 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,n_gt_8_subtile)1648 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, n_gt_8_subtile) { 1649 TEST_REQUIRES_ARM_NEON; 1650 for (uint32_t n = 9; n < 16; n++) { 1651 for (size_t k = 1; k <= 40; k += 9) { 1652 for (uint32_t m = 1; m <= 2; m++) { 1653 GemmMicrokernelTester() 1654 .mr(2) 1655 .nr(8) 1656 .kr(2) 1657 .sr(1) 1658 .m(m) 1659 .n(n) 1660 .k(k) 1661 .iterations(1) 1662 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1663 } 1664 } 1665 } 1666 } 1667 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,n_div_8)1668 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, n_div_8) { 1669 TEST_REQUIRES_ARM_NEON; 1670 for (uint32_t n = 16; n <= 24; n += 8) { 1671 for (size_t k = 1; k <= 40; k += 9) { 1672 GemmMicrokernelTester() 1673 .mr(2) 1674 .nr(8) 1675 .kr(2) 1676 .sr(1) 1677 .m(2) 1678 .n(n) 1679 .k(k) 1680 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1681 } 1682 } 1683 } 1684 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,n_div_8_strided_cn)1685 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, n_div_8_strided_cn) { 1686 TEST_REQUIRES_ARM_NEON; 1687 for (uint32_t n = 16; n <= 24; n += 8) { 1688 for (size_t k = 1; k <= 40; k += 9) { 1689 GemmMicrokernelTester() 1690 .mr(2) 1691 .nr(8) 1692 .kr(2) 1693 .sr(1) 1694 .m(2) 1695 .n(n) 1696 .k(k) 1697 .cn_stride(11) 1698 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1699 } 1700 } 1701 } 1702 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,n_div_8_subtile)1703 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, n_div_8_subtile) { 1704 TEST_REQUIRES_ARM_NEON; 1705 for (uint32_t n = 16; n <= 24; n += 8) { 1706 for (size_t k = 1; k <= 40; k += 9) { 1707 for (uint32_t m = 1; m <= 2; m++) { 1708 GemmMicrokernelTester() 1709 .mr(2) 1710 .nr(8) 1711 .kr(2) 1712 .sr(1) 1713 .m(m) 1714 .n(n) 1715 .k(k) 1716 .iterations(1) 1717 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1718 } 1719 } 1720 } 1721 } 1722 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,small_kernel)1723 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, small_kernel) { 1724 TEST_REQUIRES_ARM_NEON; 1725 for (size_t k = 1; k <= 40; k += 9) { 1726 GemmMicrokernelTester() 1727 .mr(2) 1728 .nr(8) 1729 .kr(2) 1730 .sr(1) 1731 .m(2) 1732 .n(8) 1733 .k(k) 1734 .ks(3) 1735 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1736 } 1737 } 1738 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,small_kernel_subtile)1739 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, small_kernel_subtile) { 1740 TEST_REQUIRES_ARM_NEON; 1741 for (size_t k = 1; k <= 40; k += 9) { 1742 for (uint32_t n = 1; n <= 8; n++) { 1743 for (uint32_t m = 1; m <= 2; m++) { 1744 GemmMicrokernelTester() 1745 .mr(2) 1746 .nr(8) 1747 .kr(2) 1748 .sr(1) 1749 .m(m) 1750 .n(n) 1751 .k(k) 1752 .ks(3) 1753 .iterations(1) 1754 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1755 } 1756 } 1757 } 1758 } 1759 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,n_gt_8_small_kernel)1760 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, n_gt_8_small_kernel) { 1761 TEST_REQUIRES_ARM_NEON; 1762 for (uint32_t n = 9; n < 16; n++) { 1763 for (size_t k = 1; k <= 40; k += 9) { 1764 GemmMicrokernelTester() 1765 .mr(2) 1766 .nr(8) 1767 .kr(2) 1768 .sr(1) 1769 .m(2) 1770 .n(n) 1771 .k(k) 1772 .ks(3) 1773 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1774 } 1775 } 1776 } 1777 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,n_div_8_small_kernel)1778 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, n_div_8_small_kernel) { 1779 TEST_REQUIRES_ARM_NEON; 1780 for (uint32_t n = 16; n <= 24; n += 8) { 1781 for (size_t k = 1; k <= 40; k += 9) { 1782 GemmMicrokernelTester() 1783 .mr(2) 1784 .nr(8) 1785 .kr(2) 1786 .sr(1) 1787 .m(2) 1788 .n(n) 1789 .k(k) 1790 .ks(3) 1791 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1792 } 1793 } 1794 } 1795 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,strided_cm_subtile)1796 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, strided_cm_subtile) { 1797 TEST_REQUIRES_ARM_NEON; 1798 for (size_t k = 1; k <= 40; k += 9) { 1799 for (uint32_t n = 1; n <= 8; n++) { 1800 for (uint32_t m = 1; m <= 2; m++) { 1801 GemmMicrokernelTester() 1802 .mr(2) 1803 .nr(8) 1804 .kr(2) 1805 .sr(1) 1806 .m(m) 1807 .n(n) 1808 .k(k) 1809 .cm_stride(11) 1810 .iterations(1) 1811 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1812 } 1813 } 1814 } 1815 } 1816 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,a_offset)1817 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, a_offset) { 1818 TEST_REQUIRES_ARM_NEON; 1819 for (size_t k = 1; k <= 40; k += 9) { 1820 GemmMicrokernelTester() 1821 .mr(2) 1822 .nr(8) 1823 .kr(2) 1824 .sr(1) 1825 .m(2) 1826 .n(8) 1827 .k(k) 1828 .ks(3) 1829 .a_offset(83) 1830 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1831 } 1832 } 1833 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,zero)1834 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, zero) { 1835 TEST_REQUIRES_ARM_NEON; 1836 for (size_t k = 1; k <= 40; k += 9) { 1837 for (uint32_t mz = 0; mz < 2; mz++) { 1838 GemmMicrokernelTester() 1839 .mr(2) 1840 .nr(8) 1841 .kr(2) 1842 .sr(1) 1843 .m(2) 1844 .n(8) 1845 .k(k) 1846 .ks(3) 1847 .a_offset(83) 1848 .zero_index(mz) 1849 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1850 } 1851 } 1852 } 1853 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,qmin)1854 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, qmin) { 1855 TEST_REQUIRES_ARM_NEON; 1856 GemmMicrokernelTester() 1857 .mr(2) 1858 .nr(8) 1859 .kr(2) 1860 .sr(1) 1861 .m(2) 1862 .n(8) 1863 .k(8) 1864 .qmin(128) 1865 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1866 } 1867 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,qmax)1868 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, qmax) { 1869 TEST_REQUIRES_ARM_NEON; 1870 GemmMicrokernelTester() 1871 .mr(2) 1872 .nr(8) 1873 .kr(2) 1874 .sr(1) 1875 .m(2) 1876 .n(8) 1877 .k(8) 1878 .qmax(128) 1879 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1880 } 1881 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R,strided_cm)1882 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, strided_cm) { 1883 TEST_REQUIRES_ARM_NEON; 1884 GemmMicrokernelTester() 1885 .mr(2) 1886 .nr(8) 1887 .kr(2) 1888 .sr(1) 1889 .m(2) 1890 .n(8) 1891 .k(8) 1892 .cm_stride(11) 1893 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1894 } 1895 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 1896 1897 1898 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,k_eq_16)1899 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, k_eq_16) { 1900 TEST_REQUIRES_ARM_NEON; 1901 GemmMicrokernelTester() 1902 .mr(2) 1903 .nr(8) 1904 .kr(2) 1905 .sr(1) 1906 .m(2) 1907 .n(8) 1908 .k(16) 1909 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1910 } 1911 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,strided_cn)1912 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, strided_cn) { 1913 TEST_REQUIRES_ARM_NEON; 1914 GemmMicrokernelTester() 1915 .mr(2) 1916 .nr(8) 1917 .kr(2) 1918 .sr(1) 1919 .m(2) 1920 .n(8) 1921 .k(16) 1922 .cn_stride(11) 1923 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1924 } 1925 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,k_eq_16_subtile)1926 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, k_eq_16_subtile) { 1927 TEST_REQUIRES_ARM_NEON; 1928 for (uint32_t n = 1; n <= 8; n++) { 1929 for (uint32_t m = 1; m <= 2; m++) { 1930 GemmMicrokernelTester() 1931 .mr(2) 1932 .nr(8) 1933 .kr(2) 1934 .sr(1) 1935 .m(m) 1936 .n(n) 1937 .k(16) 1938 .iterations(1) 1939 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1940 } 1941 } 1942 } 1943 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,k_eq_16_subtile_m)1944 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) { 1945 TEST_REQUIRES_ARM_NEON; 1946 for (uint32_t m = 1; m <= 2; m++) { 1947 GemmMicrokernelTester() 1948 .mr(2) 1949 .nr(8) 1950 .kr(2) 1951 .sr(1) 1952 .m(m) 1953 .n(8) 1954 .k(16) 1955 .iterations(1) 1956 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1957 } 1958 } 1959 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,k_eq_16_subtile_n)1960 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) { 1961 TEST_REQUIRES_ARM_NEON; 1962 for (uint32_t n = 1; n <= 8; n++) { 1963 GemmMicrokernelTester() 1964 .mr(2) 1965 .nr(8) 1966 .kr(2) 1967 .sr(1) 1968 .m(2) 1969 .n(n) 1970 .k(16) 1971 .iterations(1) 1972 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1973 } 1974 } 1975 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,k_lt_16)1976 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, k_lt_16) { 1977 TEST_REQUIRES_ARM_NEON; 1978 for (size_t k = 1; k < 16; k++) { 1979 GemmMicrokernelTester() 1980 .mr(2) 1981 .nr(8) 1982 .kr(2) 1983 .sr(1) 1984 .m(2) 1985 .n(8) 1986 .k(k) 1987 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1988 } 1989 } 1990 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,k_lt_16_subtile)1991 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, k_lt_16_subtile) { 1992 TEST_REQUIRES_ARM_NEON; 1993 for (size_t k = 1; k < 16; k++) { 1994 for (uint32_t n = 1; n <= 8; n++) { 1995 for (uint32_t m = 1; m <= 2; m++) { 1996 GemmMicrokernelTester() 1997 .mr(2) 1998 .nr(8) 1999 .kr(2) 2000 .sr(1) 2001 .m(m) 2002 .n(n) 2003 .k(k) 2004 .iterations(1) 2005 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2006 } 2007 } 2008 } 2009 } 2010 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,k_gt_16)2011 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, k_gt_16) { 2012 TEST_REQUIRES_ARM_NEON; 2013 for (size_t k = 17; k < 32; k++) { 2014 GemmMicrokernelTester() 2015 .mr(2) 2016 .nr(8) 2017 .kr(2) 2018 .sr(1) 2019 .m(2) 2020 .n(8) 2021 .k(k) 2022 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2023 } 2024 } 2025 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,k_gt_16_subtile)2026 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, k_gt_16_subtile) { 2027 TEST_REQUIRES_ARM_NEON; 2028 for (size_t k = 17; k < 32; k++) { 2029 for (uint32_t n = 1; n <= 8; n++) { 2030 for (uint32_t m = 1; m <= 2; m++) { 2031 GemmMicrokernelTester() 2032 .mr(2) 2033 .nr(8) 2034 .kr(2) 2035 .sr(1) 2036 .m(m) 2037 .n(n) 2038 .k(k) 2039 .iterations(1) 2040 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2041 } 2042 } 2043 } 2044 } 2045 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,k_div_16)2046 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, k_div_16) { 2047 TEST_REQUIRES_ARM_NEON; 2048 for (size_t k = 32; k <= 160; k += 16) { 2049 GemmMicrokernelTester() 2050 .mr(2) 2051 .nr(8) 2052 .kr(2) 2053 .sr(1) 2054 .m(2) 2055 .n(8) 2056 .k(k) 2057 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2058 } 2059 } 2060 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,k_div_16_subtile)2061 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, k_div_16_subtile) { 2062 TEST_REQUIRES_ARM_NEON; 2063 for (size_t k = 32; k <= 160; k += 16) { 2064 for (uint32_t n = 1; n <= 8; n++) { 2065 for (uint32_t m = 1; m <= 2; m++) { 2066 GemmMicrokernelTester() 2067 .mr(2) 2068 .nr(8) 2069 .kr(2) 2070 .sr(1) 2071 .m(m) 2072 .n(n) 2073 .k(k) 2074 .iterations(1) 2075 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2076 } 2077 } 2078 } 2079 } 2080 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,n_gt_8)2081 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, n_gt_8) { 2082 TEST_REQUIRES_ARM_NEON; 2083 for (uint32_t n = 9; n < 16; n++) { 2084 for (size_t k = 1; k <= 80; k += 17) { 2085 GemmMicrokernelTester() 2086 .mr(2) 2087 .nr(8) 2088 .kr(2) 2089 .sr(1) 2090 .m(2) 2091 .n(n) 2092 .k(k) 2093 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2094 } 2095 } 2096 } 2097 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,n_gt_8_strided_cn)2098 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, n_gt_8_strided_cn) { 2099 TEST_REQUIRES_ARM_NEON; 2100 for (uint32_t n = 9; n < 16; n++) { 2101 for (size_t k = 1; k <= 80; k += 17) { 2102 GemmMicrokernelTester() 2103 .mr(2) 2104 .nr(8) 2105 .kr(2) 2106 .sr(1) 2107 .m(2) 2108 .n(n) 2109 .k(k) 2110 .cn_stride(11) 2111 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2112 } 2113 } 2114 } 2115 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,n_gt_8_subtile)2116 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, n_gt_8_subtile) { 2117 TEST_REQUIRES_ARM_NEON; 2118 for (uint32_t n = 9; n < 16; n++) { 2119 for (size_t k = 1; k <= 80; k += 17) { 2120 for (uint32_t m = 1; m <= 2; m++) { 2121 GemmMicrokernelTester() 2122 .mr(2) 2123 .nr(8) 2124 .kr(2) 2125 .sr(1) 2126 .m(m) 2127 .n(n) 2128 .k(k) 2129 .iterations(1) 2130 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2131 } 2132 } 2133 } 2134 } 2135 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,n_div_8)2136 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, n_div_8) { 2137 TEST_REQUIRES_ARM_NEON; 2138 for (uint32_t n = 16; n <= 24; n += 8) { 2139 for (size_t k = 1; k <= 80; k += 17) { 2140 GemmMicrokernelTester() 2141 .mr(2) 2142 .nr(8) 2143 .kr(2) 2144 .sr(1) 2145 .m(2) 2146 .n(n) 2147 .k(k) 2148 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2149 } 2150 } 2151 } 2152 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,n_div_8_strided_cn)2153 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, n_div_8_strided_cn) { 2154 TEST_REQUIRES_ARM_NEON; 2155 for (uint32_t n = 16; n <= 24; n += 8) { 2156 for (size_t k = 1; k <= 80; k += 17) { 2157 GemmMicrokernelTester() 2158 .mr(2) 2159 .nr(8) 2160 .kr(2) 2161 .sr(1) 2162 .m(2) 2163 .n(n) 2164 .k(k) 2165 .cn_stride(11) 2166 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2167 } 2168 } 2169 } 2170 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,n_div_8_subtile)2171 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, n_div_8_subtile) { 2172 TEST_REQUIRES_ARM_NEON; 2173 for (uint32_t n = 16; n <= 24; n += 8) { 2174 for (size_t k = 1; k <= 80; k += 17) { 2175 for (uint32_t m = 1; m <= 2; m++) { 2176 GemmMicrokernelTester() 2177 .mr(2) 2178 .nr(8) 2179 .kr(2) 2180 .sr(1) 2181 .m(m) 2182 .n(n) 2183 .k(k) 2184 .iterations(1) 2185 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2186 } 2187 } 2188 } 2189 } 2190 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,small_kernel)2191 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, small_kernel) { 2192 TEST_REQUIRES_ARM_NEON; 2193 for (size_t k = 1; k <= 80; k += 17) { 2194 GemmMicrokernelTester() 2195 .mr(2) 2196 .nr(8) 2197 .kr(2) 2198 .sr(1) 2199 .m(2) 2200 .n(8) 2201 .k(k) 2202 .ks(3) 2203 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2204 } 2205 } 2206 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,small_kernel_subtile)2207 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, small_kernel_subtile) { 2208 TEST_REQUIRES_ARM_NEON; 2209 for (size_t k = 1; k <= 80; k += 17) { 2210 for (uint32_t n = 1; n <= 8; n++) { 2211 for (uint32_t m = 1; m <= 2; m++) { 2212 GemmMicrokernelTester() 2213 .mr(2) 2214 .nr(8) 2215 .kr(2) 2216 .sr(1) 2217 .m(m) 2218 .n(n) 2219 .k(k) 2220 .ks(3) 2221 .iterations(1) 2222 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2223 } 2224 } 2225 } 2226 } 2227 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,n_gt_8_small_kernel)2228 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, n_gt_8_small_kernel) { 2229 TEST_REQUIRES_ARM_NEON; 2230 for (uint32_t n = 9; n < 16; n++) { 2231 for (size_t k = 1; k <= 80; k += 17) { 2232 GemmMicrokernelTester() 2233 .mr(2) 2234 .nr(8) 2235 .kr(2) 2236 .sr(1) 2237 .m(2) 2238 .n(n) 2239 .k(k) 2240 .ks(3) 2241 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2242 } 2243 } 2244 } 2245 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,n_div_8_small_kernel)2246 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, n_div_8_small_kernel) { 2247 TEST_REQUIRES_ARM_NEON; 2248 for (uint32_t n = 16; n <= 24; n += 8) { 2249 for (size_t k = 1; k <= 80; k += 17) { 2250 GemmMicrokernelTester() 2251 .mr(2) 2252 .nr(8) 2253 .kr(2) 2254 .sr(1) 2255 .m(2) 2256 .n(n) 2257 .k(k) 2258 .ks(3) 2259 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2260 } 2261 } 2262 } 2263 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,strided_cm_subtile)2264 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, strided_cm_subtile) { 2265 TEST_REQUIRES_ARM_NEON; 2266 for (size_t k = 1; k <= 80; k += 17) { 2267 for (uint32_t n = 1; n <= 8; n++) { 2268 for (uint32_t m = 1; m <= 2; m++) { 2269 GemmMicrokernelTester() 2270 .mr(2) 2271 .nr(8) 2272 .kr(2) 2273 .sr(1) 2274 .m(m) 2275 .n(n) 2276 .k(k) 2277 .cm_stride(11) 2278 .iterations(1) 2279 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2280 } 2281 } 2282 } 2283 } 2284 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,a_offset)2285 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, a_offset) { 2286 TEST_REQUIRES_ARM_NEON; 2287 for (size_t k = 1; k <= 80; k += 17) { 2288 GemmMicrokernelTester() 2289 .mr(2) 2290 .nr(8) 2291 .kr(2) 2292 .sr(1) 2293 .m(2) 2294 .n(8) 2295 .k(k) 2296 .ks(3) 2297 .a_offset(163) 2298 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2299 } 2300 } 2301 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,zero)2302 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, zero) { 2303 TEST_REQUIRES_ARM_NEON; 2304 for (size_t k = 1; k <= 80; k += 17) { 2305 for (uint32_t mz = 0; mz < 2; mz++) { 2306 GemmMicrokernelTester() 2307 .mr(2) 2308 .nr(8) 2309 .kr(2) 2310 .sr(1) 2311 .m(2) 2312 .n(8) 2313 .k(k) 2314 .ks(3) 2315 .a_offset(163) 2316 .zero_index(mz) 2317 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2318 } 2319 } 2320 } 2321 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,qmin)2322 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, qmin) { 2323 TEST_REQUIRES_ARM_NEON; 2324 GemmMicrokernelTester() 2325 .mr(2) 2326 .nr(8) 2327 .kr(2) 2328 .sr(1) 2329 .m(2) 2330 .n(8) 2331 .k(16) 2332 .qmin(128) 2333 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2334 } 2335 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,qmax)2336 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, qmax) { 2337 TEST_REQUIRES_ARM_NEON; 2338 GemmMicrokernelTester() 2339 .mr(2) 2340 .nr(8) 2341 .kr(2) 2342 .sr(1) 2343 .m(2) 2344 .n(8) 2345 .k(16) 2346 .qmax(128) 2347 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2348 } 2349 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R,strided_cm)2350 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD2R, strided_cm) { 2351 TEST_REQUIRES_ARM_NEON; 2352 GemmMicrokernelTester() 2353 .mr(2) 2354 .nr(8) 2355 .kr(2) 2356 .sr(1) 2357 .m(2) 2358 .n(8) 2359 .k(16) 2360 .cm_stride(11) 2361 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2362 } 2363 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 2364 2365 2366 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,k_eq_8)2367 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, k_eq_8) { 2368 TEST_REQUIRES_ARM_NEON; 2369 GemmMicrokernelTester() 2370 .mr(3) 2371 .nr(8) 2372 .kr(2) 2373 .sr(1) 2374 .m(3) 2375 .n(8) 2376 .k(8) 2377 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2378 } 2379 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,strided_cn)2380 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, strided_cn) { 2381 TEST_REQUIRES_ARM_NEON; 2382 GemmMicrokernelTester() 2383 .mr(3) 2384 .nr(8) 2385 .kr(2) 2386 .sr(1) 2387 .m(3) 2388 .n(8) 2389 .k(8) 2390 .cn_stride(11) 2391 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2392 } 2393 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,k_eq_8_subtile)2394 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, k_eq_8_subtile) { 2395 TEST_REQUIRES_ARM_NEON; 2396 for (uint32_t n = 1; n <= 8; n++) { 2397 for (uint32_t m = 1; m <= 3; m++) { 2398 GemmMicrokernelTester() 2399 .mr(3) 2400 .nr(8) 2401 .kr(2) 2402 .sr(1) 2403 .m(m) 2404 .n(n) 2405 .k(8) 2406 .iterations(1) 2407 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2408 } 2409 } 2410 } 2411 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,k_eq_8_subtile_m)2412 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, k_eq_8_subtile_m) { 2413 TEST_REQUIRES_ARM_NEON; 2414 for (uint32_t m = 1; m <= 3; m++) { 2415 GemmMicrokernelTester() 2416 .mr(3) 2417 .nr(8) 2418 .kr(2) 2419 .sr(1) 2420 .m(m) 2421 .n(8) 2422 .k(8) 2423 .iterations(1) 2424 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2425 } 2426 } 2427 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,k_eq_8_subtile_n)2428 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, k_eq_8_subtile_n) { 2429 TEST_REQUIRES_ARM_NEON; 2430 for (uint32_t n = 1; n <= 8; n++) { 2431 GemmMicrokernelTester() 2432 .mr(3) 2433 .nr(8) 2434 .kr(2) 2435 .sr(1) 2436 .m(3) 2437 .n(n) 2438 .k(8) 2439 .iterations(1) 2440 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2441 } 2442 } 2443 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,k_lt_8)2444 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, k_lt_8) { 2445 TEST_REQUIRES_ARM_NEON; 2446 for (size_t k = 1; k < 8; k++) { 2447 GemmMicrokernelTester() 2448 .mr(3) 2449 .nr(8) 2450 .kr(2) 2451 .sr(1) 2452 .m(3) 2453 .n(8) 2454 .k(k) 2455 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2456 } 2457 } 2458 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,k_lt_8_subtile)2459 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, k_lt_8_subtile) { 2460 TEST_REQUIRES_ARM_NEON; 2461 for (size_t k = 1; k < 8; k++) { 2462 for (uint32_t n = 1; n <= 8; n++) { 2463 for (uint32_t m = 1; m <= 3; m++) { 2464 GemmMicrokernelTester() 2465 .mr(3) 2466 .nr(8) 2467 .kr(2) 2468 .sr(1) 2469 .m(m) 2470 .n(n) 2471 .k(k) 2472 .iterations(1) 2473 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2474 } 2475 } 2476 } 2477 } 2478 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,k_gt_8)2479 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, k_gt_8) { 2480 TEST_REQUIRES_ARM_NEON; 2481 for (size_t k = 9; k < 16; k++) { 2482 GemmMicrokernelTester() 2483 .mr(3) 2484 .nr(8) 2485 .kr(2) 2486 .sr(1) 2487 .m(3) 2488 .n(8) 2489 .k(k) 2490 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2491 } 2492 } 2493 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,k_gt_8_subtile)2494 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, k_gt_8_subtile) { 2495 TEST_REQUIRES_ARM_NEON; 2496 for (size_t k = 9; k < 16; k++) { 2497 for (uint32_t n = 1; n <= 8; n++) { 2498 for (uint32_t m = 1; m <= 3; m++) { 2499 GemmMicrokernelTester() 2500 .mr(3) 2501 .nr(8) 2502 .kr(2) 2503 .sr(1) 2504 .m(m) 2505 .n(n) 2506 .k(k) 2507 .iterations(1) 2508 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2509 } 2510 } 2511 } 2512 } 2513 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,k_div_8)2514 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, k_div_8) { 2515 TEST_REQUIRES_ARM_NEON; 2516 for (size_t k = 16; k <= 80; k += 8) { 2517 GemmMicrokernelTester() 2518 .mr(3) 2519 .nr(8) 2520 .kr(2) 2521 .sr(1) 2522 .m(3) 2523 .n(8) 2524 .k(k) 2525 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2526 } 2527 } 2528 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,k_div_8_subtile)2529 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, k_div_8_subtile) { 2530 TEST_REQUIRES_ARM_NEON; 2531 for (size_t k = 16; k <= 80; k += 8) { 2532 for (uint32_t n = 1; n <= 8; n++) { 2533 for (uint32_t m = 1; m <= 3; m++) { 2534 GemmMicrokernelTester() 2535 .mr(3) 2536 .nr(8) 2537 .kr(2) 2538 .sr(1) 2539 .m(m) 2540 .n(n) 2541 .k(k) 2542 .iterations(1) 2543 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2544 } 2545 } 2546 } 2547 } 2548 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,n_gt_8)2549 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, n_gt_8) { 2550 TEST_REQUIRES_ARM_NEON; 2551 for (uint32_t n = 9; n < 16; n++) { 2552 for (size_t k = 1; k <= 40; k += 9) { 2553 GemmMicrokernelTester() 2554 .mr(3) 2555 .nr(8) 2556 .kr(2) 2557 .sr(1) 2558 .m(3) 2559 .n(n) 2560 .k(k) 2561 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2562 } 2563 } 2564 } 2565 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,n_gt_8_strided_cn)2566 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, n_gt_8_strided_cn) { 2567 TEST_REQUIRES_ARM_NEON; 2568 for (uint32_t n = 9; n < 16; n++) { 2569 for (size_t k = 1; k <= 40; k += 9) { 2570 GemmMicrokernelTester() 2571 .mr(3) 2572 .nr(8) 2573 .kr(2) 2574 .sr(1) 2575 .m(3) 2576 .n(n) 2577 .k(k) 2578 .cn_stride(11) 2579 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2580 } 2581 } 2582 } 2583 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,n_gt_8_subtile)2584 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, n_gt_8_subtile) { 2585 TEST_REQUIRES_ARM_NEON; 2586 for (uint32_t n = 9; n < 16; n++) { 2587 for (size_t k = 1; k <= 40; k += 9) { 2588 for (uint32_t m = 1; m <= 3; m++) { 2589 GemmMicrokernelTester() 2590 .mr(3) 2591 .nr(8) 2592 .kr(2) 2593 .sr(1) 2594 .m(m) 2595 .n(n) 2596 .k(k) 2597 .iterations(1) 2598 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2599 } 2600 } 2601 } 2602 } 2603 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,n_div_8)2604 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, n_div_8) { 2605 TEST_REQUIRES_ARM_NEON; 2606 for (uint32_t n = 16; n <= 24; n += 8) { 2607 for (size_t k = 1; k <= 40; k += 9) { 2608 GemmMicrokernelTester() 2609 .mr(3) 2610 .nr(8) 2611 .kr(2) 2612 .sr(1) 2613 .m(3) 2614 .n(n) 2615 .k(k) 2616 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2617 } 2618 } 2619 } 2620 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,n_div_8_strided_cn)2621 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, n_div_8_strided_cn) { 2622 TEST_REQUIRES_ARM_NEON; 2623 for (uint32_t n = 16; n <= 24; n += 8) { 2624 for (size_t k = 1; k <= 40; k += 9) { 2625 GemmMicrokernelTester() 2626 .mr(3) 2627 .nr(8) 2628 .kr(2) 2629 .sr(1) 2630 .m(3) 2631 .n(n) 2632 .k(k) 2633 .cn_stride(11) 2634 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2635 } 2636 } 2637 } 2638 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,n_div_8_subtile)2639 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, n_div_8_subtile) { 2640 TEST_REQUIRES_ARM_NEON; 2641 for (uint32_t n = 16; n <= 24; n += 8) { 2642 for (size_t k = 1; k <= 40; k += 9) { 2643 for (uint32_t m = 1; m <= 3; m++) { 2644 GemmMicrokernelTester() 2645 .mr(3) 2646 .nr(8) 2647 .kr(2) 2648 .sr(1) 2649 .m(m) 2650 .n(n) 2651 .k(k) 2652 .iterations(1) 2653 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2654 } 2655 } 2656 } 2657 } 2658 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,small_kernel)2659 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, small_kernel) { 2660 TEST_REQUIRES_ARM_NEON; 2661 for (size_t k = 1; k <= 40; k += 9) { 2662 GemmMicrokernelTester() 2663 .mr(3) 2664 .nr(8) 2665 .kr(2) 2666 .sr(1) 2667 .m(3) 2668 .n(8) 2669 .k(k) 2670 .ks(3) 2671 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2672 } 2673 } 2674 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,small_kernel_subtile)2675 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, small_kernel_subtile) { 2676 TEST_REQUIRES_ARM_NEON; 2677 for (size_t k = 1; k <= 40; k += 9) { 2678 for (uint32_t n = 1; n <= 8; n++) { 2679 for (uint32_t m = 1; m <= 3; m++) { 2680 GemmMicrokernelTester() 2681 .mr(3) 2682 .nr(8) 2683 .kr(2) 2684 .sr(1) 2685 .m(m) 2686 .n(n) 2687 .k(k) 2688 .ks(3) 2689 .iterations(1) 2690 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2691 } 2692 } 2693 } 2694 } 2695 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,n_gt_8_small_kernel)2696 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, n_gt_8_small_kernel) { 2697 TEST_REQUIRES_ARM_NEON; 2698 for (uint32_t n = 9; n < 16; n++) { 2699 for (size_t k = 1; k <= 40; k += 9) { 2700 GemmMicrokernelTester() 2701 .mr(3) 2702 .nr(8) 2703 .kr(2) 2704 .sr(1) 2705 .m(3) 2706 .n(n) 2707 .k(k) 2708 .ks(3) 2709 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2710 } 2711 } 2712 } 2713 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,n_div_8_small_kernel)2714 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, n_div_8_small_kernel) { 2715 TEST_REQUIRES_ARM_NEON; 2716 for (uint32_t n = 16; n <= 24; n += 8) { 2717 for (size_t k = 1; k <= 40; k += 9) { 2718 GemmMicrokernelTester() 2719 .mr(3) 2720 .nr(8) 2721 .kr(2) 2722 .sr(1) 2723 .m(3) 2724 .n(n) 2725 .k(k) 2726 .ks(3) 2727 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2728 } 2729 } 2730 } 2731 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,strided_cm_subtile)2732 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, strided_cm_subtile) { 2733 TEST_REQUIRES_ARM_NEON; 2734 for (size_t k = 1; k <= 40; k += 9) { 2735 for (uint32_t n = 1; n <= 8; n++) { 2736 for (uint32_t m = 1; m <= 3; m++) { 2737 GemmMicrokernelTester() 2738 .mr(3) 2739 .nr(8) 2740 .kr(2) 2741 .sr(1) 2742 .m(m) 2743 .n(n) 2744 .k(k) 2745 .cm_stride(11) 2746 .iterations(1) 2747 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2748 } 2749 } 2750 } 2751 } 2752 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,a_offset)2753 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, a_offset) { 2754 TEST_REQUIRES_ARM_NEON; 2755 for (size_t k = 1; k <= 40; k += 9) { 2756 GemmMicrokernelTester() 2757 .mr(3) 2758 .nr(8) 2759 .kr(2) 2760 .sr(1) 2761 .m(3) 2762 .n(8) 2763 .k(k) 2764 .ks(3) 2765 .a_offset(127) 2766 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2767 } 2768 } 2769 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,zero)2770 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, zero) { 2771 TEST_REQUIRES_ARM_NEON; 2772 for (size_t k = 1; k <= 40; k += 9) { 2773 for (uint32_t mz = 0; mz < 3; mz++) { 2774 GemmMicrokernelTester() 2775 .mr(3) 2776 .nr(8) 2777 .kr(2) 2778 .sr(1) 2779 .m(3) 2780 .n(8) 2781 .k(k) 2782 .ks(3) 2783 .a_offset(127) 2784 .zero_index(mz) 2785 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2786 } 2787 } 2788 } 2789 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,qmin)2790 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, qmin) { 2791 TEST_REQUIRES_ARM_NEON; 2792 GemmMicrokernelTester() 2793 .mr(3) 2794 .nr(8) 2795 .kr(2) 2796 .sr(1) 2797 .m(3) 2798 .n(8) 2799 .k(8) 2800 .qmin(128) 2801 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2802 } 2803 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,qmax)2804 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, qmax) { 2805 TEST_REQUIRES_ARM_NEON; 2806 GemmMicrokernelTester() 2807 .mr(3) 2808 .nr(8) 2809 .kr(2) 2810 .sr(1) 2811 .m(3) 2812 .n(8) 2813 .k(8) 2814 .qmax(128) 2815 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2816 } 2817 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R,strided_cm)2818 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD4R, strided_cm) { 2819 TEST_REQUIRES_ARM_NEON; 2820 GemmMicrokernelTester() 2821 .mr(3) 2822 .nr(8) 2823 .kr(2) 2824 .sr(1) 2825 .m(3) 2826 .n(8) 2827 .k(8) 2828 .cm_stride(11) 2829 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2830 } 2831 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 2832 2833 2834 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,k_eq_8)2835 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, k_eq_8) { 2836 TEST_REQUIRES_ARM_NEON; 2837 GemmMicrokernelTester() 2838 .mr(4) 2839 .nr(8) 2840 .kr(2) 2841 .sr(1) 2842 .m(4) 2843 .n(8) 2844 .k(8) 2845 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2846 } 2847 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,strided_cn)2848 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, strided_cn) { 2849 TEST_REQUIRES_ARM_NEON; 2850 GemmMicrokernelTester() 2851 .mr(4) 2852 .nr(8) 2853 .kr(2) 2854 .sr(1) 2855 .m(4) 2856 .n(8) 2857 .k(8) 2858 .cn_stride(11) 2859 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2860 } 2861 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,k_eq_8_subtile)2862 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, k_eq_8_subtile) { 2863 TEST_REQUIRES_ARM_NEON; 2864 for (uint32_t n = 1; n <= 8; n++) { 2865 for (uint32_t m = 1; m <= 4; m++) { 2866 GemmMicrokernelTester() 2867 .mr(4) 2868 .nr(8) 2869 .kr(2) 2870 .sr(1) 2871 .m(m) 2872 .n(n) 2873 .k(8) 2874 .iterations(1) 2875 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2876 } 2877 } 2878 } 2879 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,k_eq_8_subtile_m)2880 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, k_eq_8_subtile_m) { 2881 TEST_REQUIRES_ARM_NEON; 2882 for (uint32_t m = 1; m <= 4; m++) { 2883 GemmMicrokernelTester() 2884 .mr(4) 2885 .nr(8) 2886 .kr(2) 2887 .sr(1) 2888 .m(m) 2889 .n(8) 2890 .k(8) 2891 .iterations(1) 2892 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2893 } 2894 } 2895 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,k_eq_8_subtile_n)2896 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, k_eq_8_subtile_n) { 2897 TEST_REQUIRES_ARM_NEON; 2898 for (uint32_t n = 1; n <= 8; n++) { 2899 GemmMicrokernelTester() 2900 .mr(4) 2901 .nr(8) 2902 .kr(2) 2903 .sr(1) 2904 .m(4) 2905 .n(n) 2906 .k(8) 2907 .iterations(1) 2908 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2909 } 2910 } 2911 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,k_lt_8)2912 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, k_lt_8) { 2913 TEST_REQUIRES_ARM_NEON; 2914 for (size_t k = 1; k < 8; k++) { 2915 GemmMicrokernelTester() 2916 .mr(4) 2917 .nr(8) 2918 .kr(2) 2919 .sr(1) 2920 .m(4) 2921 .n(8) 2922 .k(k) 2923 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2924 } 2925 } 2926 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,k_lt_8_subtile)2927 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, k_lt_8_subtile) { 2928 TEST_REQUIRES_ARM_NEON; 2929 for (size_t k = 1; k < 8; k++) { 2930 for (uint32_t n = 1; n <= 8; n++) { 2931 for (uint32_t m = 1; m <= 4; m++) { 2932 GemmMicrokernelTester() 2933 .mr(4) 2934 .nr(8) 2935 .kr(2) 2936 .sr(1) 2937 .m(m) 2938 .n(n) 2939 .k(k) 2940 .iterations(1) 2941 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2942 } 2943 } 2944 } 2945 } 2946 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,k_gt_8)2947 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, k_gt_8) { 2948 TEST_REQUIRES_ARM_NEON; 2949 for (size_t k = 9; k < 16; k++) { 2950 GemmMicrokernelTester() 2951 .mr(4) 2952 .nr(8) 2953 .kr(2) 2954 .sr(1) 2955 .m(4) 2956 .n(8) 2957 .k(k) 2958 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2959 } 2960 } 2961 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,k_gt_8_subtile)2962 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, k_gt_8_subtile) { 2963 TEST_REQUIRES_ARM_NEON; 2964 for (size_t k = 9; k < 16; k++) { 2965 for (uint32_t n = 1; n <= 8; n++) { 2966 for (uint32_t m = 1; m <= 4; m++) { 2967 GemmMicrokernelTester() 2968 .mr(4) 2969 .nr(8) 2970 .kr(2) 2971 .sr(1) 2972 .m(m) 2973 .n(n) 2974 .k(k) 2975 .iterations(1) 2976 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2977 } 2978 } 2979 } 2980 } 2981 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,k_div_8)2982 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, k_div_8) { 2983 TEST_REQUIRES_ARM_NEON; 2984 for (size_t k = 16; k <= 80; k += 8) { 2985 GemmMicrokernelTester() 2986 .mr(4) 2987 .nr(8) 2988 .kr(2) 2989 .sr(1) 2990 .m(4) 2991 .n(8) 2992 .k(k) 2993 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2994 } 2995 } 2996 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,k_div_8_subtile)2997 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, k_div_8_subtile) { 2998 TEST_REQUIRES_ARM_NEON; 2999 for (size_t k = 16; k <= 80; k += 8) { 3000 for (uint32_t n = 1; n <= 8; n++) { 3001 for (uint32_t m = 1; m <= 4; m++) { 3002 GemmMicrokernelTester() 3003 .mr(4) 3004 .nr(8) 3005 .kr(2) 3006 .sr(1) 3007 .m(m) 3008 .n(n) 3009 .k(k) 3010 .iterations(1) 3011 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3012 } 3013 } 3014 } 3015 } 3016 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,n_gt_8)3017 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, n_gt_8) { 3018 TEST_REQUIRES_ARM_NEON; 3019 for (uint32_t n = 9; n < 16; n++) { 3020 for (size_t k = 1; k <= 40; k += 9) { 3021 GemmMicrokernelTester() 3022 .mr(4) 3023 .nr(8) 3024 .kr(2) 3025 .sr(1) 3026 .m(4) 3027 .n(n) 3028 .k(k) 3029 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3030 } 3031 } 3032 } 3033 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,n_gt_8_strided_cn)3034 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, n_gt_8_strided_cn) { 3035 TEST_REQUIRES_ARM_NEON; 3036 for (uint32_t n = 9; n < 16; n++) { 3037 for (size_t k = 1; k <= 40; k += 9) { 3038 GemmMicrokernelTester() 3039 .mr(4) 3040 .nr(8) 3041 .kr(2) 3042 .sr(1) 3043 .m(4) 3044 .n(n) 3045 .k(k) 3046 .cn_stride(11) 3047 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3048 } 3049 } 3050 } 3051 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,n_gt_8_subtile)3052 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, n_gt_8_subtile) { 3053 TEST_REQUIRES_ARM_NEON; 3054 for (uint32_t n = 9; n < 16; n++) { 3055 for (size_t k = 1; k <= 40; k += 9) { 3056 for (uint32_t m = 1; m <= 4; m++) { 3057 GemmMicrokernelTester() 3058 .mr(4) 3059 .nr(8) 3060 .kr(2) 3061 .sr(1) 3062 .m(m) 3063 .n(n) 3064 .k(k) 3065 .iterations(1) 3066 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3067 } 3068 } 3069 } 3070 } 3071 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,n_div_8)3072 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, n_div_8) { 3073 TEST_REQUIRES_ARM_NEON; 3074 for (uint32_t n = 16; n <= 24; n += 8) { 3075 for (size_t k = 1; k <= 40; k += 9) { 3076 GemmMicrokernelTester() 3077 .mr(4) 3078 .nr(8) 3079 .kr(2) 3080 .sr(1) 3081 .m(4) 3082 .n(n) 3083 .k(k) 3084 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3085 } 3086 } 3087 } 3088 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,n_div_8_strided_cn)3089 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, n_div_8_strided_cn) { 3090 TEST_REQUIRES_ARM_NEON; 3091 for (uint32_t n = 16; n <= 24; n += 8) { 3092 for (size_t k = 1; k <= 40; k += 9) { 3093 GemmMicrokernelTester() 3094 .mr(4) 3095 .nr(8) 3096 .kr(2) 3097 .sr(1) 3098 .m(4) 3099 .n(n) 3100 .k(k) 3101 .cn_stride(11) 3102 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3103 } 3104 } 3105 } 3106 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,n_div_8_subtile)3107 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, n_div_8_subtile) { 3108 TEST_REQUIRES_ARM_NEON; 3109 for (uint32_t n = 16; n <= 24; n += 8) { 3110 for (size_t k = 1; k <= 40; k += 9) { 3111 for (uint32_t m = 1; m <= 4; m++) { 3112 GemmMicrokernelTester() 3113 .mr(4) 3114 .nr(8) 3115 .kr(2) 3116 .sr(1) 3117 .m(m) 3118 .n(n) 3119 .k(k) 3120 .iterations(1) 3121 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3122 } 3123 } 3124 } 3125 } 3126 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,small_kernel)3127 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, small_kernel) { 3128 TEST_REQUIRES_ARM_NEON; 3129 for (size_t k = 1; k <= 40; k += 9) { 3130 GemmMicrokernelTester() 3131 .mr(4) 3132 .nr(8) 3133 .kr(2) 3134 .sr(1) 3135 .m(4) 3136 .n(8) 3137 .k(k) 3138 .ks(3) 3139 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3140 } 3141 } 3142 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,small_kernel_subtile)3143 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, small_kernel_subtile) { 3144 TEST_REQUIRES_ARM_NEON; 3145 for (size_t k = 1; k <= 40; k += 9) { 3146 for (uint32_t n = 1; n <= 8; n++) { 3147 for (uint32_t m = 1; m <= 4; m++) { 3148 GemmMicrokernelTester() 3149 .mr(4) 3150 .nr(8) 3151 .kr(2) 3152 .sr(1) 3153 .m(m) 3154 .n(n) 3155 .k(k) 3156 .ks(3) 3157 .iterations(1) 3158 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3159 } 3160 } 3161 } 3162 } 3163 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,n_gt_8_small_kernel)3164 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, n_gt_8_small_kernel) { 3165 TEST_REQUIRES_ARM_NEON; 3166 for (uint32_t n = 9; n < 16; n++) { 3167 for (size_t k = 1; k <= 40; k += 9) { 3168 GemmMicrokernelTester() 3169 .mr(4) 3170 .nr(8) 3171 .kr(2) 3172 .sr(1) 3173 .m(4) 3174 .n(n) 3175 .k(k) 3176 .ks(3) 3177 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3178 } 3179 } 3180 } 3181 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,n_div_8_small_kernel)3182 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, n_div_8_small_kernel) { 3183 TEST_REQUIRES_ARM_NEON; 3184 for (uint32_t n = 16; n <= 24; n += 8) { 3185 for (size_t k = 1; k <= 40; k += 9) { 3186 GemmMicrokernelTester() 3187 .mr(4) 3188 .nr(8) 3189 .kr(2) 3190 .sr(1) 3191 .m(4) 3192 .n(n) 3193 .k(k) 3194 .ks(3) 3195 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3196 } 3197 } 3198 } 3199 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,strided_cm_subtile)3200 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, strided_cm_subtile) { 3201 TEST_REQUIRES_ARM_NEON; 3202 for (size_t k = 1; k <= 40; k += 9) { 3203 for (uint32_t n = 1; n <= 8; n++) { 3204 for (uint32_t m = 1; m <= 4; m++) { 3205 GemmMicrokernelTester() 3206 .mr(4) 3207 .nr(8) 3208 .kr(2) 3209 .sr(1) 3210 .m(m) 3211 .n(n) 3212 .k(k) 3213 .cm_stride(11) 3214 .iterations(1) 3215 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3216 } 3217 } 3218 } 3219 } 3220 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,a_offset)3221 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, a_offset) { 3222 TEST_REQUIRES_ARM_NEON; 3223 for (size_t k = 1; k <= 40; k += 9) { 3224 GemmMicrokernelTester() 3225 .mr(4) 3226 .nr(8) 3227 .kr(2) 3228 .sr(1) 3229 .m(4) 3230 .n(8) 3231 .k(k) 3232 .ks(3) 3233 .a_offset(163) 3234 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3235 } 3236 } 3237 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,zero)3238 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, zero) { 3239 TEST_REQUIRES_ARM_NEON; 3240 for (size_t k = 1; k <= 40; k += 9) { 3241 for (uint32_t mz = 0; mz < 4; mz++) { 3242 GemmMicrokernelTester() 3243 .mr(4) 3244 .nr(8) 3245 .kr(2) 3246 .sr(1) 3247 .m(4) 3248 .n(8) 3249 .k(k) 3250 .ks(3) 3251 .a_offset(163) 3252 .zero_index(mz) 3253 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3254 } 3255 } 3256 } 3257 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,qmin)3258 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, qmin) { 3259 TEST_REQUIRES_ARM_NEON; 3260 GemmMicrokernelTester() 3261 .mr(4) 3262 .nr(8) 3263 .kr(2) 3264 .sr(1) 3265 .m(4) 3266 .n(8) 3267 .k(8) 3268 .qmin(128) 3269 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3270 } 3271 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,qmax)3272 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, qmax) { 3273 TEST_REQUIRES_ARM_NEON; 3274 GemmMicrokernelTester() 3275 .mr(4) 3276 .nr(8) 3277 .kr(2) 3278 .sr(1) 3279 .m(4) 3280 .n(8) 3281 .k(8) 3282 .qmax(128) 3283 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3284 } 3285 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R,strided_cm)3286 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD4R, strided_cm) { 3287 TEST_REQUIRES_ARM_NEON; 3288 GemmMicrokernelTester() 3289 .mr(4) 3290 .nr(8) 3291 .kr(2) 3292 .sr(1) 3293 .m(4) 3294 .n(8) 3295 .k(8) 3296 .cm_stride(11) 3297 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3298 } 3299 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 3300 3301 3302 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,k_eq_16)3303 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, k_eq_16) { 3304 TEST_REQUIRES_ARM_NEON; 3305 GemmMicrokernelTester() 3306 .mr(3) 3307 .nr(8) 3308 .kr(2) 3309 .sr(1) 3310 .m(3) 3311 .n(8) 3312 .k(16) 3313 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3314 } 3315 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,strided_cn)3316 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, strided_cn) { 3317 TEST_REQUIRES_ARM_NEON; 3318 GemmMicrokernelTester() 3319 .mr(3) 3320 .nr(8) 3321 .kr(2) 3322 .sr(1) 3323 .m(3) 3324 .n(8) 3325 .k(16) 3326 .cn_stride(11) 3327 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3328 } 3329 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,k_eq_16_subtile)3330 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, k_eq_16_subtile) { 3331 TEST_REQUIRES_ARM_NEON; 3332 for (uint32_t n = 1; n <= 8; n++) { 3333 for (uint32_t m = 1; m <= 3; m++) { 3334 GemmMicrokernelTester() 3335 .mr(3) 3336 .nr(8) 3337 .kr(2) 3338 .sr(1) 3339 .m(m) 3340 .n(n) 3341 .k(16) 3342 .iterations(1) 3343 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3344 } 3345 } 3346 } 3347 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,k_eq_16_subtile_m)3348 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) { 3349 TEST_REQUIRES_ARM_NEON; 3350 for (uint32_t m = 1; m <= 3; m++) { 3351 GemmMicrokernelTester() 3352 .mr(3) 3353 .nr(8) 3354 .kr(2) 3355 .sr(1) 3356 .m(m) 3357 .n(8) 3358 .k(16) 3359 .iterations(1) 3360 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3361 } 3362 } 3363 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,k_eq_16_subtile_n)3364 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) { 3365 TEST_REQUIRES_ARM_NEON; 3366 for (uint32_t n = 1; n <= 8; n++) { 3367 GemmMicrokernelTester() 3368 .mr(3) 3369 .nr(8) 3370 .kr(2) 3371 .sr(1) 3372 .m(3) 3373 .n(n) 3374 .k(16) 3375 .iterations(1) 3376 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3377 } 3378 } 3379 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,k_lt_16)3380 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, k_lt_16) { 3381 TEST_REQUIRES_ARM_NEON; 3382 for (size_t k = 1; k < 16; k++) { 3383 GemmMicrokernelTester() 3384 .mr(3) 3385 .nr(8) 3386 .kr(2) 3387 .sr(1) 3388 .m(3) 3389 .n(8) 3390 .k(k) 3391 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3392 } 3393 } 3394 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,k_lt_16_subtile)3395 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, k_lt_16_subtile) { 3396 TEST_REQUIRES_ARM_NEON; 3397 for (size_t k = 1; k < 16; k++) { 3398 for (uint32_t n = 1; n <= 8; n++) { 3399 for (uint32_t m = 1; m <= 3; m++) { 3400 GemmMicrokernelTester() 3401 .mr(3) 3402 .nr(8) 3403 .kr(2) 3404 .sr(1) 3405 .m(m) 3406 .n(n) 3407 .k(k) 3408 .iterations(1) 3409 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3410 } 3411 } 3412 } 3413 } 3414 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,k_gt_16)3415 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, k_gt_16) { 3416 TEST_REQUIRES_ARM_NEON; 3417 for (size_t k = 17; k < 32; k++) { 3418 GemmMicrokernelTester() 3419 .mr(3) 3420 .nr(8) 3421 .kr(2) 3422 .sr(1) 3423 .m(3) 3424 .n(8) 3425 .k(k) 3426 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3427 } 3428 } 3429 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,k_gt_16_subtile)3430 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, k_gt_16_subtile) { 3431 TEST_REQUIRES_ARM_NEON; 3432 for (size_t k = 17; k < 32; k++) { 3433 for (uint32_t n = 1; n <= 8; n++) { 3434 for (uint32_t m = 1; m <= 3; m++) { 3435 GemmMicrokernelTester() 3436 .mr(3) 3437 .nr(8) 3438 .kr(2) 3439 .sr(1) 3440 .m(m) 3441 .n(n) 3442 .k(k) 3443 .iterations(1) 3444 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3445 } 3446 } 3447 } 3448 } 3449 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,k_div_16)3450 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, k_div_16) { 3451 TEST_REQUIRES_ARM_NEON; 3452 for (size_t k = 32; k <= 160; k += 16) { 3453 GemmMicrokernelTester() 3454 .mr(3) 3455 .nr(8) 3456 .kr(2) 3457 .sr(1) 3458 .m(3) 3459 .n(8) 3460 .k(k) 3461 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3462 } 3463 } 3464 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,k_div_16_subtile)3465 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, k_div_16_subtile) { 3466 TEST_REQUIRES_ARM_NEON; 3467 for (size_t k = 32; k <= 160; k += 16) { 3468 for (uint32_t n = 1; n <= 8; n++) { 3469 for (uint32_t m = 1; m <= 3; m++) { 3470 GemmMicrokernelTester() 3471 .mr(3) 3472 .nr(8) 3473 .kr(2) 3474 .sr(1) 3475 .m(m) 3476 .n(n) 3477 .k(k) 3478 .iterations(1) 3479 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3480 } 3481 } 3482 } 3483 } 3484 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,n_gt_8)3485 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, n_gt_8) { 3486 TEST_REQUIRES_ARM_NEON; 3487 for (uint32_t n = 9; n < 16; n++) { 3488 for (size_t k = 1; k <= 80; k += 17) { 3489 GemmMicrokernelTester() 3490 .mr(3) 3491 .nr(8) 3492 .kr(2) 3493 .sr(1) 3494 .m(3) 3495 .n(n) 3496 .k(k) 3497 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3498 } 3499 } 3500 } 3501 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,n_gt_8_strided_cn)3502 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, n_gt_8_strided_cn) { 3503 TEST_REQUIRES_ARM_NEON; 3504 for (uint32_t n = 9; n < 16; n++) { 3505 for (size_t k = 1; k <= 80; k += 17) { 3506 GemmMicrokernelTester() 3507 .mr(3) 3508 .nr(8) 3509 .kr(2) 3510 .sr(1) 3511 .m(3) 3512 .n(n) 3513 .k(k) 3514 .cn_stride(11) 3515 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3516 } 3517 } 3518 } 3519 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,n_gt_8_subtile)3520 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, n_gt_8_subtile) { 3521 TEST_REQUIRES_ARM_NEON; 3522 for (uint32_t n = 9; n < 16; n++) { 3523 for (size_t k = 1; k <= 80; k += 17) { 3524 for (uint32_t m = 1; m <= 3; m++) { 3525 GemmMicrokernelTester() 3526 .mr(3) 3527 .nr(8) 3528 .kr(2) 3529 .sr(1) 3530 .m(m) 3531 .n(n) 3532 .k(k) 3533 .iterations(1) 3534 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3535 } 3536 } 3537 } 3538 } 3539 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,n_div_8)3540 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, n_div_8) { 3541 TEST_REQUIRES_ARM_NEON; 3542 for (uint32_t n = 16; n <= 24; n += 8) { 3543 for (size_t k = 1; k <= 80; k += 17) { 3544 GemmMicrokernelTester() 3545 .mr(3) 3546 .nr(8) 3547 .kr(2) 3548 .sr(1) 3549 .m(3) 3550 .n(n) 3551 .k(k) 3552 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3553 } 3554 } 3555 } 3556 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,n_div_8_strided_cn)3557 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, n_div_8_strided_cn) { 3558 TEST_REQUIRES_ARM_NEON; 3559 for (uint32_t n = 16; n <= 24; n += 8) { 3560 for (size_t k = 1; k <= 80; k += 17) { 3561 GemmMicrokernelTester() 3562 .mr(3) 3563 .nr(8) 3564 .kr(2) 3565 .sr(1) 3566 .m(3) 3567 .n(n) 3568 .k(k) 3569 .cn_stride(11) 3570 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3571 } 3572 } 3573 } 3574 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,n_div_8_subtile)3575 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, n_div_8_subtile) { 3576 TEST_REQUIRES_ARM_NEON; 3577 for (uint32_t n = 16; n <= 24; n += 8) { 3578 for (size_t k = 1; k <= 80; k += 17) { 3579 for (uint32_t m = 1; m <= 3; m++) { 3580 GemmMicrokernelTester() 3581 .mr(3) 3582 .nr(8) 3583 .kr(2) 3584 .sr(1) 3585 .m(m) 3586 .n(n) 3587 .k(k) 3588 .iterations(1) 3589 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3590 } 3591 } 3592 } 3593 } 3594 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,small_kernel)3595 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, small_kernel) { 3596 TEST_REQUIRES_ARM_NEON; 3597 for (size_t k = 1; k <= 80; k += 17) { 3598 GemmMicrokernelTester() 3599 .mr(3) 3600 .nr(8) 3601 .kr(2) 3602 .sr(1) 3603 .m(3) 3604 .n(8) 3605 .k(k) 3606 .ks(3) 3607 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3608 } 3609 } 3610 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,small_kernel_subtile)3611 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, small_kernel_subtile) { 3612 TEST_REQUIRES_ARM_NEON; 3613 for (size_t k = 1; k <= 80; k += 17) { 3614 for (uint32_t n = 1; n <= 8; n++) { 3615 for (uint32_t m = 1; m <= 3; m++) { 3616 GemmMicrokernelTester() 3617 .mr(3) 3618 .nr(8) 3619 .kr(2) 3620 .sr(1) 3621 .m(m) 3622 .n(n) 3623 .k(k) 3624 .ks(3) 3625 .iterations(1) 3626 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3627 } 3628 } 3629 } 3630 } 3631 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,n_gt_8_small_kernel)3632 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, n_gt_8_small_kernel) { 3633 TEST_REQUIRES_ARM_NEON; 3634 for (uint32_t n = 9; n < 16; n++) { 3635 for (size_t k = 1; k <= 80; k += 17) { 3636 GemmMicrokernelTester() 3637 .mr(3) 3638 .nr(8) 3639 .kr(2) 3640 .sr(1) 3641 .m(3) 3642 .n(n) 3643 .k(k) 3644 .ks(3) 3645 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3646 } 3647 } 3648 } 3649 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,n_div_8_small_kernel)3650 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, n_div_8_small_kernel) { 3651 TEST_REQUIRES_ARM_NEON; 3652 for (uint32_t n = 16; n <= 24; n += 8) { 3653 for (size_t k = 1; k <= 80; k += 17) { 3654 GemmMicrokernelTester() 3655 .mr(3) 3656 .nr(8) 3657 .kr(2) 3658 .sr(1) 3659 .m(3) 3660 .n(n) 3661 .k(k) 3662 .ks(3) 3663 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3664 } 3665 } 3666 } 3667 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,strided_cm_subtile)3668 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, strided_cm_subtile) { 3669 TEST_REQUIRES_ARM_NEON; 3670 for (size_t k = 1; k <= 80; k += 17) { 3671 for (uint32_t n = 1; n <= 8; n++) { 3672 for (uint32_t m = 1; m <= 3; m++) { 3673 GemmMicrokernelTester() 3674 .mr(3) 3675 .nr(8) 3676 .kr(2) 3677 .sr(1) 3678 .m(m) 3679 .n(n) 3680 .k(k) 3681 .cm_stride(11) 3682 .iterations(1) 3683 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3684 } 3685 } 3686 } 3687 } 3688 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,a_offset)3689 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, a_offset) { 3690 TEST_REQUIRES_ARM_NEON; 3691 for (size_t k = 1; k <= 80; k += 17) { 3692 GemmMicrokernelTester() 3693 .mr(3) 3694 .nr(8) 3695 .kr(2) 3696 .sr(1) 3697 .m(3) 3698 .n(8) 3699 .k(k) 3700 .ks(3) 3701 .a_offset(251) 3702 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3703 } 3704 } 3705 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,zero)3706 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, zero) { 3707 TEST_REQUIRES_ARM_NEON; 3708 for (size_t k = 1; k <= 80; k += 17) { 3709 for (uint32_t mz = 0; mz < 3; mz++) { 3710 GemmMicrokernelTester() 3711 .mr(3) 3712 .nr(8) 3713 .kr(2) 3714 .sr(1) 3715 .m(3) 3716 .n(8) 3717 .k(k) 3718 .ks(3) 3719 .a_offset(251) 3720 .zero_index(mz) 3721 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3722 } 3723 } 3724 } 3725 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,qmin)3726 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, qmin) { 3727 TEST_REQUIRES_ARM_NEON; 3728 GemmMicrokernelTester() 3729 .mr(3) 3730 .nr(8) 3731 .kr(2) 3732 .sr(1) 3733 .m(3) 3734 .n(8) 3735 .k(16) 3736 .qmin(128) 3737 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3738 } 3739 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,qmax)3740 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, qmax) { 3741 TEST_REQUIRES_ARM_NEON; 3742 GemmMicrokernelTester() 3743 .mr(3) 3744 .nr(8) 3745 .kr(2) 3746 .sr(1) 3747 .m(3) 3748 .n(8) 3749 .k(16) 3750 .qmax(128) 3751 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3752 } 3753 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R,strided_cm)3754 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD4R, strided_cm) { 3755 TEST_REQUIRES_ARM_NEON; 3756 GemmMicrokernelTester() 3757 .mr(3) 3758 .nr(8) 3759 .kr(2) 3760 .sr(1) 3761 .m(3) 3762 .n(8) 3763 .k(16) 3764 .cm_stride(11) 3765 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3766 } 3767 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 3768 3769 3770 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,k_eq_16)3771 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, k_eq_16) { 3772 TEST_REQUIRES_ARM_NEON; 3773 GemmMicrokernelTester() 3774 .mr(4) 3775 .nr(8) 3776 .kr(2) 3777 .sr(1) 3778 .m(4) 3779 .n(8) 3780 .k(16) 3781 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3782 } 3783 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,strided_cn)3784 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, strided_cn) { 3785 TEST_REQUIRES_ARM_NEON; 3786 GemmMicrokernelTester() 3787 .mr(4) 3788 .nr(8) 3789 .kr(2) 3790 .sr(1) 3791 .m(4) 3792 .n(8) 3793 .k(16) 3794 .cn_stride(11) 3795 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3796 } 3797 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,k_eq_16_subtile)3798 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, k_eq_16_subtile) { 3799 TEST_REQUIRES_ARM_NEON; 3800 for (uint32_t n = 1; n <= 8; n++) { 3801 for (uint32_t m = 1; m <= 4; m++) { 3802 GemmMicrokernelTester() 3803 .mr(4) 3804 .nr(8) 3805 .kr(2) 3806 .sr(1) 3807 .m(m) 3808 .n(n) 3809 .k(16) 3810 .iterations(1) 3811 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3812 } 3813 } 3814 } 3815 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,k_eq_16_subtile_m)3816 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) { 3817 TEST_REQUIRES_ARM_NEON; 3818 for (uint32_t m = 1; m <= 4; m++) { 3819 GemmMicrokernelTester() 3820 .mr(4) 3821 .nr(8) 3822 .kr(2) 3823 .sr(1) 3824 .m(m) 3825 .n(8) 3826 .k(16) 3827 .iterations(1) 3828 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3829 } 3830 } 3831 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,k_eq_16_subtile_n)3832 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) { 3833 TEST_REQUIRES_ARM_NEON; 3834 for (uint32_t n = 1; n <= 8; n++) { 3835 GemmMicrokernelTester() 3836 .mr(4) 3837 .nr(8) 3838 .kr(2) 3839 .sr(1) 3840 .m(4) 3841 .n(n) 3842 .k(16) 3843 .iterations(1) 3844 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3845 } 3846 } 3847 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,k_lt_16)3848 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, k_lt_16) { 3849 TEST_REQUIRES_ARM_NEON; 3850 for (size_t k = 1; k < 16; k++) { 3851 GemmMicrokernelTester() 3852 .mr(4) 3853 .nr(8) 3854 .kr(2) 3855 .sr(1) 3856 .m(4) 3857 .n(8) 3858 .k(k) 3859 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3860 } 3861 } 3862 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,k_lt_16_subtile)3863 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, k_lt_16_subtile) { 3864 TEST_REQUIRES_ARM_NEON; 3865 for (size_t k = 1; k < 16; k++) { 3866 for (uint32_t n = 1; n <= 8; n++) { 3867 for (uint32_t m = 1; m <= 4; m++) { 3868 GemmMicrokernelTester() 3869 .mr(4) 3870 .nr(8) 3871 .kr(2) 3872 .sr(1) 3873 .m(m) 3874 .n(n) 3875 .k(k) 3876 .iterations(1) 3877 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3878 } 3879 } 3880 } 3881 } 3882 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,k_gt_16)3883 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, k_gt_16) { 3884 TEST_REQUIRES_ARM_NEON; 3885 for (size_t k = 17; k < 32; k++) { 3886 GemmMicrokernelTester() 3887 .mr(4) 3888 .nr(8) 3889 .kr(2) 3890 .sr(1) 3891 .m(4) 3892 .n(8) 3893 .k(k) 3894 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3895 } 3896 } 3897 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,k_gt_16_subtile)3898 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, k_gt_16_subtile) { 3899 TEST_REQUIRES_ARM_NEON; 3900 for (size_t k = 17; k < 32; k++) { 3901 for (uint32_t n = 1; n <= 8; n++) { 3902 for (uint32_t m = 1; m <= 4; m++) { 3903 GemmMicrokernelTester() 3904 .mr(4) 3905 .nr(8) 3906 .kr(2) 3907 .sr(1) 3908 .m(m) 3909 .n(n) 3910 .k(k) 3911 .iterations(1) 3912 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3913 } 3914 } 3915 } 3916 } 3917 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,k_div_16)3918 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, k_div_16) { 3919 TEST_REQUIRES_ARM_NEON; 3920 for (size_t k = 32; k <= 160; k += 16) { 3921 GemmMicrokernelTester() 3922 .mr(4) 3923 .nr(8) 3924 .kr(2) 3925 .sr(1) 3926 .m(4) 3927 .n(8) 3928 .k(k) 3929 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3930 } 3931 } 3932 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,k_div_16_subtile)3933 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, k_div_16_subtile) { 3934 TEST_REQUIRES_ARM_NEON; 3935 for (size_t k = 32; k <= 160; k += 16) { 3936 for (uint32_t n = 1; n <= 8; n++) { 3937 for (uint32_t m = 1; m <= 4; m++) { 3938 GemmMicrokernelTester() 3939 .mr(4) 3940 .nr(8) 3941 .kr(2) 3942 .sr(1) 3943 .m(m) 3944 .n(n) 3945 .k(k) 3946 .iterations(1) 3947 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3948 } 3949 } 3950 } 3951 } 3952 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,n_gt_8)3953 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, n_gt_8) { 3954 TEST_REQUIRES_ARM_NEON; 3955 for (uint32_t n = 9; n < 16; n++) { 3956 for (size_t k = 1; k <= 80; k += 17) { 3957 GemmMicrokernelTester() 3958 .mr(4) 3959 .nr(8) 3960 .kr(2) 3961 .sr(1) 3962 .m(4) 3963 .n(n) 3964 .k(k) 3965 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3966 } 3967 } 3968 } 3969 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,n_gt_8_strided_cn)3970 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, n_gt_8_strided_cn) { 3971 TEST_REQUIRES_ARM_NEON; 3972 for (uint32_t n = 9; n < 16; n++) { 3973 for (size_t k = 1; k <= 80; k += 17) { 3974 GemmMicrokernelTester() 3975 .mr(4) 3976 .nr(8) 3977 .kr(2) 3978 .sr(1) 3979 .m(4) 3980 .n(n) 3981 .k(k) 3982 .cn_stride(11) 3983 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3984 } 3985 } 3986 } 3987 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,n_gt_8_subtile)3988 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, n_gt_8_subtile) { 3989 TEST_REQUIRES_ARM_NEON; 3990 for (uint32_t n = 9; n < 16; n++) { 3991 for (size_t k = 1; k <= 80; k += 17) { 3992 for (uint32_t m = 1; m <= 4; m++) { 3993 GemmMicrokernelTester() 3994 .mr(4) 3995 .nr(8) 3996 .kr(2) 3997 .sr(1) 3998 .m(m) 3999 .n(n) 4000 .k(k) 4001 .iterations(1) 4002 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4003 } 4004 } 4005 } 4006 } 4007 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,n_div_8)4008 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, n_div_8) { 4009 TEST_REQUIRES_ARM_NEON; 4010 for (uint32_t n = 16; n <= 24; n += 8) { 4011 for (size_t k = 1; k <= 80; k += 17) { 4012 GemmMicrokernelTester() 4013 .mr(4) 4014 .nr(8) 4015 .kr(2) 4016 .sr(1) 4017 .m(4) 4018 .n(n) 4019 .k(k) 4020 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4021 } 4022 } 4023 } 4024 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,n_div_8_strided_cn)4025 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, n_div_8_strided_cn) { 4026 TEST_REQUIRES_ARM_NEON; 4027 for (uint32_t n = 16; n <= 24; n += 8) { 4028 for (size_t k = 1; k <= 80; k += 17) { 4029 GemmMicrokernelTester() 4030 .mr(4) 4031 .nr(8) 4032 .kr(2) 4033 .sr(1) 4034 .m(4) 4035 .n(n) 4036 .k(k) 4037 .cn_stride(11) 4038 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4039 } 4040 } 4041 } 4042 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,n_div_8_subtile)4043 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, n_div_8_subtile) { 4044 TEST_REQUIRES_ARM_NEON; 4045 for (uint32_t n = 16; n <= 24; n += 8) { 4046 for (size_t k = 1; k <= 80; k += 17) { 4047 for (uint32_t m = 1; m <= 4; m++) { 4048 GemmMicrokernelTester() 4049 .mr(4) 4050 .nr(8) 4051 .kr(2) 4052 .sr(1) 4053 .m(m) 4054 .n(n) 4055 .k(k) 4056 .iterations(1) 4057 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4058 } 4059 } 4060 } 4061 } 4062 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,small_kernel)4063 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, small_kernel) { 4064 TEST_REQUIRES_ARM_NEON; 4065 for (size_t k = 1; k <= 80; k += 17) { 4066 GemmMicrokernelTester() 4067 .mr(4) 4068 .nr(8) 4069 .kr(2) 4070 .sr(1) 4071 .m(4) 4072 .n(8) 4073 .k(k) 4074 .ks(3) 4075 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4076 } 4077 } 4078 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,small_kernel_subtile)4079 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, small_kernel_subtile) { 4080 TEST_REQUIRES_ARM_NEON; 4081 for (size_t k = 1; k <= 80; k += 17) { 4082 for (uint32_t n = 1; n <= 8; n++) { 4083 for (uint32_t m = 1; m <= 4; m++) { 4084 GemmMicrokernelTester() 4085 .mr(4) 4086 .nr(8) 4087 .kr(2) 4088 .sr(1) 4089 .m(m) 4090 .n(n) 4091 .k(k) 4092 .ks(3) 4093 .iterations(1) 4094 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4095 } 4096 } 4097 } 4098 } 4099 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,n_gt_8_small_kernel)4100 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, n_gt_8_small_kernel) { 4101 TEST_REQUIRES_ARM_NEON; 4102 for (uint32_t n = 9; n < 16; n++) { 4103 for (size_t k = 1; k <= 80; k += 17) { 4104 GemmMicrokernelTester() 4105 .mr(4) 4106 .nr(8) 4107 .kr(2) 4108 .sr(1) 4109 .m(4) 4110 .n(n) 4111 .k(k) 4112 .ks(3) 4113 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4114 } 4115 } 4116 } 4117 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,n_div_8_small_kernel)4118 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, n_div_8_small_kernel) { 4119 TEST_REQUIRES_ARM_NEON; 4120 for (uint32_t n = 16; n <= 24; n += 8) { 4121 for (size_t k = 1; k <= 80; k += 17) { 4122 GemmMicrokernelTester() 4123 .mr(4) 4124 .nr(8) 4125 .kr(2) 4126 .sr(1) 4127 .m(4) 4128 .n(n) 4129 .k(k) 4130 .ks(3) 4131 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4132 } 4133 } 4134 } 4135 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,strided_cm_subtile)4136 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, strided_cm_subtile) { 4137 TEST_REQUIRES_ARM_NEON; 4138 for (size_t k = 1; k <= 80; k += 17) { 4139 for (uint32_t n = 1; n <= 8; n++) { 4140 for (uint32_t m = 1; m <= 4; m++) { 4141 GemmMicrokernelTester() 4142 .mr(4) 4143 .nr(8) 4144 .kr(2) 4145 .sr(1) 4146 .m(m) 4147 .n(n) 4148 .k(k) 4149 .cm_stride(11) 4150 .iterations(1) 4151 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4152 } 4153 } 4154 } 4155 } 4156 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,a_offset)4157 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, a_offset) { 4158 TEST_REQUIRES_ARM_NEON; 4159 for (size_t k = 1; k <= 80; k += 17) { 4160 GemmMicrokernelTester() 4161 .mr(4) 4162 .nr(8) 4163 .kr(2) 4164 .sr(1) 4165 .m(4) 4166 .n(8) 4167 .k(k) 4168 .ks(3) 4169 .a_offset(331) 4170 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4171 } 4172 } 4173 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,zero)4174 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, zero) { 4175 TEST_REQUIRES_ARM_NEON; 4176 for (size_t k = 1; k <= 80; k += 17) { 4177 for (uint32_t mz = 0; mz < 4; mz++) { 4178 GemmMicrokernelTester() 4179 .mr(4) 4180 .nr(8) 4181 .kr(2) 4182 .sr(1) 4183 .m(4) 4184 .n(8) 4185 .k(k) 4186 .ks(3) 4187 .a_offset(331) 4188 .zero_index(mz) 4189 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4190 } 4191 } 4192 } 4193 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,qmin)4194 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, qmin) { 4195 TEST_REQUIRES_ARM_NEON; 4196 GemmMicrokernelTester() 4197 .mr(4) 4198 .nr(8) 4199 .kr(2) 4200 .sr(1) 4201 .m(4) 4202 .n(8) 4203 .k(16) 4204 .qmin(128) 4205 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4206 } 4207 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,qmax)4208 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, qmax) { 4209 TEST_REQUIRES_ARM_NEON; 4210 GemmMicrokernelTester() 4211 .mr(4) 4212 .nr(8) 4213 .kr(2) 4214 .sr(1) 4215 .m(4) 4216 .n(8) 4217 .k(16) 4218 .qmax(128) 4219 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4220 } 4221 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R,strided_cm)4222 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD4R, strided_cm) { 4223 TEST_REQUIRES_ARM_NEON; 4224 GemmMicrokernelTester() 4225 .mr(4) 4226 .nr(8) 4227 .kr(2) 4228 .sr(1) 4229 .m(4) 4230 .n(8) 4231 .k(16) 4232 .cm_stride(11) 4233 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4234 } 4235 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 4236 4237 4238 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,k_eq_8)4239 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_eq_8) { 4240 TEST_REQUIRES_ARM_NEON; 4241 GemmMicrokernelTester() 4242 .mr(1) 4243 .nr(8) 4244 .kr(4) 4245 .sr(2) 4246 .m(1) 4247 .n(8) 4248 .k(8) 4249 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4250 } 4251 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,strided_cn)4252 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, strided_cn) { 4253 TEST_REQUIRES_ARM_NEON; 4254 GemmMicrokernelTester() 4255 .mr(1) 4256 .nr(8) 4257 .kr(4) 4258 .sr(2) 4259 .m(1) 4260 .n(8) 4261 .k(8) 4262 .cn_stride(11) 4263 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4264 } 4265 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,k_eq_8_subtile)4266 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_eq_8_subtile) { 4267 TEST_REQUIRES_ARM_NEON; 4268 for (uint32_t n = 1; n <= 8; n++) { 4269 for (uint32_t m = 1; m <= 1; m++) { 4270 GemmMicrokernelTester() 4271 .mr(1) 4272 .nr(8) 4273 .kr(4) 4274 .sr(2) 4275 .m(m) 4276 .n(n) 4277 .k(8) 4278 .iterations(1) 4279 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4280 } 4281 } 4282 } 4283 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,k_eq_8_subtile_m)4284 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_eq_8_subtile_m) { 4285 TEST_REQUIRES_ARM_NEON; 4286 for (uint32_t m = 1; m <= 1; m++) { 4287 GemmMicrokernelTester() 4288 .mr(1) 4289 .nr(8) 4290 .kr(4) 4291 .sr(2) 4292 .m(m) 4293 .n(8) 4294 .k(8) 4295 .iterations(1) 4296 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4297 } 4298 } 4299 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,k_eq_8_subtile_n)4300 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_eq_8_subtile_n) { 4301 TEST_REQUIRES_ARM_NEON; 4302 for (uint32_t n = 1; n <= 8; n++) { 4303 GemmMicrokernelTester() 4304 .mr(1) 4305 .nr(8) 4306 .kr(4) 4307 .sr(2) 4308 .m(1) 4309 .n(n) 4310 .k(8) 4311 .iterations(1) 4312 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4313 } 4314 } 4315 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,k_lt_8)4316 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_lt_8) { 4317 TEST_REQUIRES_ARM_NEON; 4318 for (size_t k = 1; k < 8; k++) { 4319 GemmMicrokernelTester() 4320 .mr(1) 4321 .nr(8) 4322 .kr(4) 4323 .sr(2) 4324 .m(1) 4325 .n(8) 4326 .k(k) 4327 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4328 } 4329 } 4330 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,k_lt_8_subtile)4331 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_lt_8_subtile) { 4332 TEST_REQUIRES_ARM_NEON; 4333 for (size_t k = 1; k < 8; k++) { 4334 for (uint32_t n = 1; n <= 8; n++) { 4335 for (uint32_t m = 1; m <= 1; m++) { 4336 GemmMicrokernelTester() 4337 .mr(1) 4338 .nr(8) 4339 .kr(4) 4340 .sr(2) 4341 .m(m) 4342 .n(n) 4343 .k(k) 4344 .iterations(1) 4345 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4346 } 4347 } 4348 } 4349 } 4350 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,k_gt_8)4351 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_gt_8) { 4352 TEST_REQUIRES_ARM_NEON; 4353 for (size_t k = 9; k < 16; k++) { 4354 GemmMicrokernelTester() 4355 .mr(1) 4356 .nr(8) 4357 .kr(4) 4358 .sr(2) 4359 .m(1) 4360 .n(8) 4361 .k(k) 4362 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4363 } 4364 } 4365 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,k_gt_8_subtile)4366 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_gt_8_subtile) { 4367 TEST_REQUIRES_ARM_NEON; 4368 for (size_t k = 9; k < 16; k++) { 4369 for (uint32_t n = 1; n <= 8; n++) { 4370 for (uint32_t m = 1; m <= 1; m++) { 4371 GemmMicrokernelTester() 4372 .mr(1) 4373 .nr(8) 4374 .kr(4) 4375 .sr(2) 4376 .m(m) 4377 .n(n) 4378 .k(k) 4379 .iterations(1) 4380 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4381 } 4382 } 4383 } 4384 } 4385 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,k_div_8)4386 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_div_8) { 4387 TEST_REQUIRES_ARM_NEON; 4388 for (size_t k = 16; k <= 80; k += 8) { 4389 GemmMicrokernelTester() 4390 .mr(1) 4391 .nr(8) 4392 .kr(4) 4393 .sr(2) 4394 .m(1) 4395 .n(8) 4396 .k(k) 4397 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4398 } 4399 } 4400 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,k_div_8_subtile)4401 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_div_8_subtile) { 4402 TEST_REQUIRES_ARM_NEON; 4403 for (size_t k = 16; k <= 80; k += 8) { 4404 for (uint32_t n = 1; n <= 8; n++) { 4405 for (uint32_t m = 1; m <= 1; m++) { 4406 GemmMicrokernelTester() 4407 .mr(1) 4408 .nr(8) 4409 .kr(4) 4410 .sr(2) 4411 .m(m) 4412 .n(n) 4413 .k(k) 4414 .iterations(1) 4415 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4416 } 4417 } 4418 } 4419 } 4420 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,n_gt_8)4421 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_gt_8) { 4422 TEST_REQUIRES_ARM_NEON; 4423 for (uint32_t n = 9; n < 16; n++) { 4424 for (size_t k = 1; k <= 40; k += 9) { 4425 GemmMicrokernelTester() 4426 .mr(1) 4427 .nr(8) 4428 .kr(4) 4429 .sr(2) 4430 .m(1) 4431 .n(n) 4432 .k(k) 4433 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4434 } 4435 } 4436 } 4437 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,n_gt_8_strided_cn)4438 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_gt_8_strided_cn) { 4439 TEST_REQUIRES_ARM_NEON; 4440 for (uint32_t n = 9; n < 16; n++) { 4441 for (size_t k = 1; k <= 40; k += 9) { 4442 GemmMicrokernelTester() 4443 .mr(1) 4444 .nr(8) 4445 .kr(4) 4446 .sr(2) 4447 .m(1) 4448 .n(n) 4449 .k(k) 4450 .cn_stride(11) 4451 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4452 } 4453 } 4454 } 4455 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,n_gt_8_subtile)4456 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_gt_8_subtile) { 4457 TEST_REQUIRES_ARM_NEON; 4458 for (uint32_t n = 9; n < 16; n++) { 4459 for (size_t k = 1; k <= 40; k += 9) { 4460 for (uint32_t m = 1; m <= 1; m++) { 4461 GemmMicrokernelTester() 4462 .mr(1) 4463 .nr(8) 4464 .kr(4) 4465 .sr(2) 4466 .m(m) 4467 .n(n) 4468 .k(k) 4469 .iterations(1) 4470 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4471 } 4472 } 4473 } 4474 } 4475 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,n_div_8)4476 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_div_8) { 4477 TEST_REQUIRES_ARM_NEON; 4478 for (uint32_t n = 16; n <= 24; n += 8) { 4479 for (size_t k = 1; k <= 40; k += 9) { 4480 GemmMicrokernelTester() 4481 .mr(1) 4482 .nr(8) 4483 .kr(4) 4484 .sr(2) 4485 .m(1) 4486 .n(n) 4487 .k(k) 4488 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4489 } 4490 } 4491 } 4492 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,n_div_8_strided_cn)4493 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_div_8_strided_cn) { 4494 TEST_REQUIRES_ARM_NEON; 4495 for (uint32_t n = 16; n <= 24; n += 8) { 4496 for (size_t k = 1; k <= 40; k += 9) { 4497 GemmMicrokernelTester() 4498 .mr(1) 4499 .nr(8) 4500 .kr(4) 4501 .sr(2) 4502 .m(1) 4503 .n(n) 4504 .k(k) 4505 .cn_stride(11) 4506 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4507 } 4508 } 4509 } 4510 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,n_div_8_subtile)4511 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_div_8_subtile) { 4512 TEST_REQUIRES_ARM_NEON; 4513 for (uint32_t n = 16; n <= 24; n += 8) { 4514 for (size_t k = 1; k <= 40; k += 9) { 4515 for (uint32_t m = 1; m <= 1; m++) { 4516 GemmMicrokernelTester() 4517 .mr(1) 4518 .nr(8) 4519 .kr(4) 4520 .sr(2) 4521 .m(m) 4522 .n(n) 4523 .k(k) 4524 .iterations(1) 4525 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4526 } 4527 } 4528 } 4529 } 4530 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,small_kernel)4531 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, small_kernel) { 4532 TEST_REQUIRES_ARM_NEON; 4533 for (size_t k = 1; k <= 40; k += 9) { 4534 GemmMicrokernelTester() 4535 .mr(1) 4536 .nr(8) 4537 .kr(4) 4538 .sr(2) 4539 .m(1) 4540 .n(8) 4541 .k(k) 4542 .ks(3) 4543 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4544 } 4545 } 4546 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,small_kernel_subtile)4547 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, small_kernel_subtile) { 4548 TEST_REQUIRES_ARM_NEON; 4549 for (size_t k = 1; k <= 40; k += 9) { 4550 for (uint32_t n = 1; n <= 8; n++) { 4551 for (uint32_t m = 1; m <= 1; m++) { 4552 GemmMicrokernelTester() 4553 .mr(1) 4554 .nr(8) 4555 .kr(4) 4556 .sr(2) 4557 .m(m) 4558 .n(n) 4559 .k(k) 4560 .ks(3) 4561 .iterations(1) 4562 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4563 } 4564 } 4565 } 4566 } 4567 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,n_gt_8_small_kernel)4568 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_gt_8_small_kernel) { 4569 TEST_REQUIRES_ARM_NEON; 4570 for (uint32_t n = 9; n < 16; n++) { 4571 for (size_t k = 1; k <= 40; k += 9) { 4572 GemmMicrokernelTester() 4573 .mr(1) 4574 .nr(8) 4575 .kr(4) 4576 .sr(2) 4577 .m(1) 4578 .n(n) 4579 .k(k) 4580 .ks(3) 4581 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4582 } 4583 } 4584 } 4585 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,n_div_8_small_kernel)4586 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_div_8_small_kernel) { 4587 TEST_REQUIRES_ARM_NEON; 4588 for (uint32_t n = 16; n <= 24; n += 8) { 4589 for (size_t k = 1; k <= 40; k += 9) { 4590 GemmMicrokernelTester() 4591 .mr(1) 4592 .nr(8) 4593 .kr(4) 4594 .sr(2) 4595 .m(1) 4596 .n(n) 4597 .k(k) 4598 .ks(3) 4599 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4600 } 4601 } 4602 } 4603 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,strided_cm_subtile)4604 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, strided_cm_subtile) { 4605 TEST_REQUIRES_ARM_NEON; 4606 for (size_t k = 1; k <= 40; k += 9) { 4607 for (uint32_t n = 1; n <= 8; n++) { 4608 for (uint32_t m = 1; m <= 1; m++) { 4609 GemmMicrokernelTester() 4610 .mr(1) 4611 .nr(8) 4612 .kr(4) 4613 .sr(2) 4614 .m(m) 4615 .n(n) 4616 .k(k) 4617 .cm_stride(11) 4618 .iterations(1) 4619 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4620 } 4621 } 4622 } 4623 } 4624 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,a_offset)4625 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, a_offset) { 4626 TEST_REQUIRES_ARM_NEON; 4627 for (size_t k = 1; k <= 40; k += 9) { 4628 GemmMicrokernelTester() 4629 .mr(1) 4630 .nr(8) 4631 .kr(4) 4632 .sr(2) 4633 .m(1) 4634 .n(8) 4635 .k(k) 4636 .ks(3) 4637 .a_offset(43) 4638 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4639 } 4640 } 4641 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,zero)4642 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, zero) { 4643 TEST_REQUIRES_ARM_NEON; 4644 for (size_t k = 1; k <= 40; k += 9) { 4645 for (uint32_t mz = 0; mz < 1; mz++) { 4646 GemmMicrokernelTester() 4647 .mr(1) 4648 .nr(8) 4649 .kr(4) 4650 .sr(2) 4651 .m(1) 4652 .n(8) 4653 .k(k) 4654 .ks(3) 4655 .a_offset(43) 4656 .zero_index(mz) 4657 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4658 } 4659 } 4660 } 4661 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,qmin)4662 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, qmin) { 4663 TEST_REQUIRES_ARM_NEON; 4664 GemmMicrokernelTester() 4665 .mr(1) 4666 .nr(8) 4667 .kr(4) 4668 .sr(2) 4669 .m(1) 4670 .n(8) 4671 .k(8) 4672 .qmin(128) 4673 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4674 } 4675 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,qmax)4676 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, qmax) { 4677 TEST_REQUIRES_ARM_NEON; 4678 GemmMicrokernelTester() 4679 .mr(1) 4680 .nr(8) 4681 .kr(4) 4682 .sr(2) 4683 .m(1) 4684 .n(8) 4685 .k(8) 4686 .qmax(128) 4687 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4688 } 4689 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL,strided_cm)4690 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, strided_cm) { 4691 TEST_REQUIRES_ARM_NEON; 4692 GemmMicrokernelTester() 4693 .mr(1) 4694 .nr(8) 4695 .kr(4) 4696 .sr(2) 4697 .m(1) 4698 .n(8) 4699 .k(8) 4700 .cm_stride(11) 4701 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4702 } 4703 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 4704 4705 4706 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_eq_8)4707 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_eq_8) { 4708 TEST_REQUIRES_ARM_NEON; 4709 GemmMicrokernelTester() 4710 .mr(4) 4711 .nr(16) 4712 .kr(4) 4713 .sr(2) 4714 .m(4) 4715 .n(16) 4716 .k(8) 4717 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4718 } 4719 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,strided_cn)4720 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, strided_cn) { 4721 TEST_REQUIRES_ARM_NEON; 4722 GemmMicrokernelTester() 4723 .mr(4) 4724 .nr(16) 4725 .kr(4) 4726 .sr(2) 4727 .m(4) 4728 .n(16) 4729 .k(8) 4730 .cn_stride(19) 4731 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4732 } 4733 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_eq_8_subtile)4734 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_eq_8_subtile) { 4735 TEST_REQUIRES_ARM_NEON; 4736 for (uint32_t n = 1; n <= 16; n++) { 4737 for (uint32_t m = 1; m <= 4; m++) { 4738 GemmMicrokernelTester() 4739 .mr(4) 4740 .nr(16) 4741 .kr(4) 4742 .sr(2) 4743 .m(m) 4744 .n(n) 4745 .k(8) 4746 .iterations(1) 4747 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4748 } 4749 } 4750 } 4751 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_eq_8_subtile_m)4752 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_eq_8_subtile_m) { 4753 TEST_REQUIRES_ARM_NEON; 4754 for (uint32_t m = 1; m <= 4; m++) { 4755 GemmMicrokernelTester() 4756 .mr(4) 4757 .nr(16) 4758 .kr(4) 4759 .sr(2) 4760 .m(m) 4761 .n(16) 4762 .k(8) 4763 .iterations(1) 4764 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4765 } 4766 } 4767 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_eq_8_subtile_n)4768 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_eq_8_subtile_n) { 4769 TEST_REQUIRES_ARM_NEON; 4770 for (uint32_t n = 1; n <= 16; n++) { 4771 GemmMicrokernelTester() 4772 .mr(4) 4773 .nr(16) 4774 .kr(4) 4775 .sr(2) 4776 .m(4) 4777 .n(n) 4778 .k(8) 4779 .iterations(1) 4780 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4781 } 4782 } 4783 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_lt_8)4784 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_lt_8) { 4785 TEST_REQUIRES_ARM_NEON; 4786 for (size_t k = 1; k < 8; k++) { 4787 GemmMicrokernelTester() 4788 .mr(4) 4789 .nr(16) 4790 .kr(4) 4791 .sr(2) 4792 .m(4) 4793 .n(16) 4794 .k(k) 4795 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4796 } 4797 } 4798 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_lt_8_subtile)4799 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_lt_8_subtile) { 4800 TEST_REQUIRES_ARM_NEON; 4801 for (size_t k = 1; k < 8; k++) { 4802 for (uint32_t n = 1; n <= 16; n++) { 4803 for (uint32_t m = 1; m <= 4; m++) { 4804 GemmMicrokernelTester() 4805 .mr(4) 4806 .nr(16) 4807 .kr(4) 4808 .sr(2) 4809 .m(m) 4810 .n(n) 4811 .k(k) 4812 .iterations(1) 4813 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4814 } 4815 } 4816 } 4817 } 4818 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_gt_8)4819 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_gt_8) { 4820 TEST_REQUIRES_ARM_NEON; 4821 for (size_t k = 9; k < 16; k++) { 4822 GemmMicrokernelTester() 4823 .mr(4) 4824 .nr(16) 4825 .kr(4) 4826 .sr(2) 4827 .m(4) 4828 .n(16) 4829 .k(k) 4830 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4831 } 4832 } 4833 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_gt_8_subtile)4834 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_gt_8_subtile) { 4835 TEST_REQUIRES_ARM_NEON; 4836 for (size_t k = 9; k < 16; k++) { 4837 for (uint32_t n = 1; n <= 16; n++) { 4838 for (uint32_t m = 1; m <= 4; m++) { 4839 GemmMicrokernelTester() 4840 .mr(4) 4841 .nr(16) 4842 .kr(4) 4843 .sr(2) 4844 .m(m) 4845 .n(n) 4846 .k(k) 4847 .iterations(1) 4848 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4849 } 4850 } 4851 } 4852 } 4853 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_div_8)4854 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_div_8) { 4855 TEST_REQUIRES_ARM_NEON; 4856 for (size_t k = 16; k <= 80; k += 8) { 4857 GemmMicrokernelTester() 4858 .mr(4) 4859 .nr(16) 4860 .kr(4) 4861 .sr(2) 4862 .m(4) 4863 .n(16) 4864 .k(k) 4865 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4866 } 4867 } 4868 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_div_8_subtile)4869 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_div_8_subtile) { 4870 TEST_REQUIRES_ARM_NEON; 4871 for (size_t k = 16; k <= 80; k += 8) { 4872 for (uint32_t n = 1; n <= 16; n++) { 4873 for (uint32_t m = 1; m <= 4; m++) { 4874 GemmMicrokernelTester() 4875 .mr(4) 4876 .nr(16) 4877 .kr(4) 4878 .sr(2) 4879 .m(m) 4880 .n(n) 4881 .k(k) 4882 .iterations(1) 4883 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4884 } 4885 } 4886 } 4887 } 4888 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,n_gt_16)4889 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, n_gt_16) { 4890 TEST_REQUIRES_ARM_NEON; 4891 for (uint32_t n = 17; n < 32; n++) { 4892 for (size_t k = 1; k <= 40; k += 9) { 4893 GemmMicrokernelTester() 4894 .mr(4) 4895 .nr(16) 4896 .kr(4) 4897 .sr(2) 4898 .m(4) 4899 .n(n) 4900 .k(k) 4901 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4902 } 4903 } 4904 } 4905 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,n_gt_16_strided_cn)4906 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, n_gt_16_strided_cn) { 4907 TEST_REQUIRES_ARM_NEON; 4908 for (uint32_t n = 17; n < 32; n++) { 4909 for (size_t k = 1; k <= 40; k += 9) { 4910 GemmMicrokernelTester() 4911 .mr(4) 4912 .nr(16) 4913 .kr(4) 4914 .sr(2) 4915 .m(4) 4916 .n(n) 4917 .k(k) 4918 .cn_stride(19) 4919 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4920 } 4921 } 4922 } 4923 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,n_gt_16_subtile)4924 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, n_gt_16_subtile) { 4925 TEST_REQUIRES_ARM_NEON; 4926 for (uint32_t n = 17; n < 32; n++) { 4927 for (size_t k = 1; k <= 40; k += 9) { 4928 for (uint32_t m = 1; m <= 4; m++) { 4929 GemmMicrokernelTester() 4930 .mr(4) 4931 .nr(16) 4932 .kr(4) 4933 .sr(2) 4934 .m(m) 4935 .n(n) 4936 .k(k) 4937 .iterations(1) 4938 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4939 } 4940 } 4941 } 4942 } 4943 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,n_div_16)4944 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, n_div_16) { 4945 TEST_REQUIRES_ARM_NEON; 4946 for (uint32_t n = 32; n <= 48; n += 16) { 4947 for (size_t k = 1; k <= 40; k += 9) { 4948 GemmMicrokernelTester() 4949 .mr(4) 4950 .nr(16) 4951 .kr(4) 4952 .sr(2) 4953 .m(4) 4954 .n(n) 4955 .k(k) 4956 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4957 } 4958 } 4959 } 4960 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,n_div_16_strided_cn)4961 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, n_div_16_strided_cn) { 4962 TEST_REQUIRES_ARM_NEON; 4963 for (uint32_t n = 32; n <= 48; n += 16) { 4964 for (size_t k = 1; k <= 40; k += 9) { 4965 GemmMicrokernelTester() 4966 .mr(4) 4967 .nr(16) 4968 .kr(4) 4969 .sr(2) 4970 .m(4) 4971 .n(n) 4972 .k(k) 4973 .cn_stride(19) 4974 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4975 } 4976 } 4977 } 4978 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,n_div_16_subtile)4979 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, n_div_16_subtile) { 4980 TEST_REQUIRES_ARM_NEON; 4981 for (uint32_t n = 32; n <= 48; n += 16) { 4982 for (size_t k = 1; k <= 40; k += 9) { 4983 for (uint32_t m = 1; m <= 4; m++) { 4984 GemmMicrokernelTester() 4985 .mr(4) 4986 .nr(16) 4987 .kr(4) 4988 .sr(2) 4989 .m(m) 4990 .n(n) 4991 .k(k) 4992 .iterations(1) 4993 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4994 } 4995 } 4996 } 4997 } 4998 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,small_kernel)4999 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, small_kernel) { 5000 TEST_REQUIRES_ARM_NEON; 5001 for (size_t k = 1; k <= 40; k += 9) { 5002 GemmMicrokernelTester() 5003 .mr(4) 5004 .nr(16) 5005 .kr(4) 5006 .sr(2) 5007 .m(4) 5008 .n(16) 5009 .k(k) 5010 .ks(3) 5011 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5012 } 5013 } 5014 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,small_kernel_subtile)5015 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, small_kernel_subtile) { 5016 TEST_REQUIRES_ARM_NEON; 5017 for (size_t k = 1; k <= 40; k += 9) { 5018 for (uint32_t n = 1; n <= 16; n++) { 5019 for (uint32_t m = 1; m <= 4; m++) { 5020 GemmMicrokernelTester() 5021 .mr(4) 5022 .nr(16) 5023 .kr(4) 5024 .sr(2) 5025 .m(m) 5026 .n(n) 5027 .k(k) 5028 .ks(3) 5029 .iterations(1) 5030 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5031 } 5032 } 5033 } 5034 } 5035 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,n_gt_16_small_kernel)5036 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, n_gt_16_small_kernel) { 5037 TEST_REQUIRES_ARM_NEON; 5038 for (uint32_t n = 17; n < 32; n++) { 5039 for (size_t k = 1; k <= 40; k += 9) { 5040 GemmMicrokernelTester() 5041 .mr(4) 5042 .nr(16) 5043 .kr(4) 5044 .sr(2) 5045 .m(4) 5046 .n(n) 5047 .k(k) 5048 .ks(3) 5049 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5050 } 5051 } 5052 } 5053 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,n_div_16_small_kernel)5054 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, n_div_16_small_kernel) { 5055 TEST_REQUIRES_ARM_NEON; 5056 for (uint32_t n = 32; n <= 48; n += 16) { 5057 for (size_t k = 1; k <= 40; k += 9) { 5058 GemmMicrokernelTester() 5059 .mr(4) 5060 .nr(16) 5061 .kr(4) 5062 .sr(2) 5063 .m(4) 5064 .n(n) 5065 .k(k) 5066 .ks(3) 5067 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5068 } 5069 } 5070 } 5071 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,strided_cm_subtile)5072 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, strided_cm_subtile) { 5073 TEST_REQUIRES_ARM_NEON; 5074 for (size_t k = 1; k <= 40; k += 9) { 5075 for (uint32_t n = 1; n <= 16; n++) { 5076 for (uint32_t m = 1; m <= 4; m++) { 5077 GemmMicrokernelTester() 5078 .mr(4) 5079 .nr(16) 5080 .kr(4) 5081 .sr(2) 5082 .m(m) 5083 .n(n) 5084 .k(k) 5085 .cm_stride(19) 5086 .iterations(1) 5087 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5088 } 5089 } 5090 } 5091 } 5092 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,a_offset)5093 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, a_offset) { 5094 TEST_REQUIRES_ARM_NEON; 5095 for (size_t k = 1; k <= 40; k += 9) { 5096 GemmMicrokernelTester() 5097 .mr(4) 5098 .nr(16) 5099 .kr(4) 5100 .sr(2) 5101 .m(4) 5102 .n(16) 5103 .k(k) 5104 .ks(3) 5105 .a_offset(163) 5106 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5107 } 5108 } 5109 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,zero)5110 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, zero) { 5111 TEST_REQUIRES_ARM_NEON; 5112 for (size_t k = 1; k <= 40; k += 9) { 5113 for (uint32_t mz = 0; mz < 4; mz++) { 5114 GemmMicrokernelTester() 5115 .mr(4) 5116 .nr(16) 5117 .kr(4) 5118 .sr(2) 5119 .m(4) 5120 .n(16) 5121 .k(k) 5122 .ks(3) 5123 .a_offset(163) 5124 .zero_index(mz) 5125 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5126 } 5127 } 5128 } 5129 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,qmin)5130 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, qmin) { 5131 TEST_REQUIRES_ARM_NEON; 5132 GemmMicrokernelTester() 5133 .mr(4) 5134 .nr(16) 5135 .kr(4) 5136 .sr(2) 5137 .m(4) 5138 .n(16) 5139 .k(8) 5140 .qmin(128) 5141 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5142 } 5143 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,qmax)5144 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, qmax) { 5145 TEST_REQUIRES_ARM_NEON; 5146 GemmMicrokernelTester() 5147 .mr(4) 5148 .nr(16) 5149 .kr(4) 5150 .sr(2) 5151 .m(4) 5152 .n(16) 5153 .k(8) 5154 .qmax(128) 5155 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5156 } 5157 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,strided_cm)5158 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, strided_cm) { 5159 TEST_REQUIRES_ARM_NEON; 5160 GemmMicrokernelTester() 5161 .mr(4) 5162 .nr(16) 5163 .kr(4) 5164 .sr(2) 5165 .m(4) 5166 .n(16) 5167 .k(8) 5168 .cm_stride(19) 5169 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5170 } 5171 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 5172 5173 5174 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,k_eq_16)5175 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, k_eq_16) { 5176 TEST_REQUIRES_ARM_NEON; 5177 GemmMicrokernelTester() 5178 .mr(2) 5179 .nr(8) 5180 .kr(4) 5181 .sr(2) 5182 .m(2) 5183 .n(8) 5184 .k(16) 5185 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5186 } 5187 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,strided_cn)5188 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, strided_cn) { 5189 TEST_REQUIRES_ARM_NEON; 5190 GemmMicrokernelTester() 5191 .mr(2) 5192 .nr(8) 5193 .kr(4) 5194 .sr(2) 5195 .m(2) 5196 .n(8) 5197 .k(16) 5198 .cn_stride(11) 5199 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5200 } 5201 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,k_eq_16_subtile)5202 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, k_eq_16_subtile) { 5203 TEST_REQUIRES_ARM_NEON; 5204 for (uint32_t n = 1; n <= 8; n++) { 5205 for (uint32_t m = 1; m <= 2; m++) { 5206 GemmMicrokernelTester() 5207 .mr(2) 5208 .nr(8) 5209 .kr(4) 5210 .sr(2) 5211 .m(m) 5212 .n(n) 5213 .k(16) 5214 .iterations(1) 5215 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5216 } 5217 } 5218 } 5219 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,k_eq_16_subtile_m)5220 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, k_eq_16_subtile_m) { 5221 TEST_REQUIRES_ARM_NEON; 5222 for (uint32_t m = 1; m <= 2; m++) { 5223 GemmMicrokernelTester() 5224 .mr(2) 5225 .nr(8) 5226 .kr(4) 5227 .sr(2) 5228 .m(m) 5229 .n(8) 5230 .k(16) 5231 .iterations(1) 5232 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5233 } 5234 } 5235 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,k_eq_16_subtile_n)5236 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, k_eq_16_subtile_n) { 5237 TEST_REQUIRES_ARM_NEON; 5238 for (uint32_t n = 1; n <= 8; n++) { 5239 GemmMicrokernelTester() 5240 .mr(2) 5241 .nr(8) 5242 .kr(4) 5243 .sr(2) 5244 .m(2) 5245 .n(n) 5246 .k(16) 5247 .iterations(1) 5248 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5249 } 5250 } 5251 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,k_lt_16)5252 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, k_lt_16) { 5253 TEST_REQUIRES_ARM_NEON; 5254 for (size_t k = 1; k < 16; k++) { 5255 GemmMicrokernelTester() 5256 .mr(2) 5257 .nr(8) 5258 .kr(4) 5259 .sr(2) 5260 .m(2) 5261 .n(8) 5262 .k(k) 5263 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5264 } 5265 } 5266 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,k_lt_16_subtile)5267 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, k_lt_16_subtile) { 5268 TEST_REQUIRES_ARM_NEON; 5269 for (size_t k = 1; k < 16; k++) { 5270 for (uint32_t n = 1; n <= 8; n++) { 5271 for (uint32_t m = 1; m <= 2; m++) { 5272 GemmMicrokernelTester() 5273 .mr(2) 5274 .nr(8) 5275 .kr(4) 5276 .sr(2) 5277 .m(m) 5278 .n(n) 5279 .k(k) 5280 .iterations(1) 5281 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5282 } 5283 } 5284 } 5285 } 5286 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,k_gt_16)5287 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, k_gt_16) { 5288 TEST_REQUIRES_ARM_NEON; 5289 for (size_t k = 17; k < 32; k++) { 5290 GemmMicrokernelTester() 5291 .mr(2) 5292 .nr(8) 5293 .kr(4) 5294 .sr(2) 5295 .m(2) 5296 .n(8) 5297 .k(k) 5298 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5299 } 5300 } 5301 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,k_gt_16_subtile)5302 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, k_gt_16_subtile) { 5303 TEST_REQUIRES_ARM_NEON; 5304 for (size_t k = 17; k < 32; k++) { 5305 for (uint32_t n = 1; n <= 8; n++) { 5306 for (uint32_t m = 1; m <= 2; m++) { 5307 GemmMicrokernelTester() 5308 .mr(2) 5309 .nr(8) 5310 .kr(4) 5311 .sr(2) 5312 .m(m) 5313 .n(n) 5314 .k(k) 5315 .iterations(1) 5316 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5317 } 5318 } 5319 } 5320 } 5321 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,k_div_16)5322 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, k_div_16) { 5323 TEST_REQUIRES_ARM_NEON; 5324 for (size_t k = 32; k <= 160; k += 16) { 5325 GemmMicrokernelTester() 5326 .mr(2) 5327 .nr(8) 5328 .kr(4) 5329 .sr(2) 5330 .m(2) 5331 .n(8) 5332 .k(k) 5333 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5334 } 5335 } 5336 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,k_div_16_subtile)5337 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, k_div_16_subtile) { 5338 TEST_REQUIRES_ARM_NEON; 5339 for (size_t k = 32; k <= 160; k += 16) { 5340 for (uint32_t n = 1; n <= 8; n++) { 5341 for (uint32_t m = 1; m <= 2; m++) { 5342 GemmMicrokernelTester() 5343 .mr(2) 5344 .nr(8) 5345 .kr(4) 5346 .sr(2) 5347 .m(m) 5348 .n(n) 5349 .k(k) 5350 .iterations(1) 5351 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5352 } 5353 } 5354 } 5355 } 5356 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,n_gt_8)5357 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, n_gt_8) { 5358 TEST_REQUIRES_ARM_NEON; 5359 for (uint32_t n = 9; n < 16; n++) { 5360 for (size_t k = 1; k <= 80; k += 17) { 5361 GemmMicrokernelTester() 5362 .mr(2) 5363 .nr(8) 5364 .kr(4) 5365 .sr(2) 5366 .m(2) 5367 .n(n) 5368 .k(k) 5369 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5370 } 5371 } 5372 } 5373 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,n_gt_8_strided_cn)5374 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, n_gt_8_strided_cn) { 5375 TEST_REQUIRES_ARM_NEON; 5376 for (uint32_t n = 9; n < 16; n++) { 5377 for (size_t k = 1; k <= 80; k += 17) { 5378 GemmMicrokernelTester() 5379 .mr(2) 5380 .nr(8) 5381 .kr(4) 5382 .sr(2) 5383 .m(2) 5384 .n(n) 5385 .k(k) 5386 .cn_stride(11) 5387 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5388 } 5389 } 5390 } 5391 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,n_gt_8_subtile)5392 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, n_gt_8_subtile) { 5393 TEST_REQUIRES_ARM_NEON; 5394 for (uint32_t n = 9; n < 16; n++) { 5395 for (size_t k = 1; k <= 80; k += 17) { 5396 for (uint32_t m = 1; m <= 2; m++) { 5397 GemmMicrokernelTester() 5398 .mr(2) 5399 .nr(8) 5400 .kr(4) 5401 .sr(2) 5402 .m(m) 5403 .n(n) 5404 .k(k) 5405 .iterations(1) 5406 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5407 } 5408 } 5409 } 5410 } 5411 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,n_div_8)5412 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, n_div_8) { 5413 TEST_REQUIRES_ARM_NEON; 5414 for (uint32_t n = 16; n <= 24; n += 8) { 5415 for (size_t k = 1; k <= 80; k += 17) { 5416 GemmMicrokernelTester() 5417 .mr(2) 5418 .nr(8) 5419 .kr(4) 5420 .sr(2) 5421 .m(2) 5422 .n(n) 5423 .k(k) 5424 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5425 } 5426 } 5427 } 5428 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,n_div_8_strided_cn)5429 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, n_div_8_strided_cn) { 5430 TEST_REQUIRES_ARM_NEON; 5431 for (uint32_t n = 16; n <= 24; n += 8) { 5432 for (size_t k = 1; k <= 80; k += 17) { 5433 GemmMicrokernelTester() 5434 .mr(2) 5435 .nr(8) 5436 .kr(4) 5437 .sr(2) 5438 .m(2) 5439 .n(n) 5440 .k(k) 5441 .cn_stride(11) 5442 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5443 } 5444 } 5445 } 5446 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,n_div_8_subtile)5447 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, n_div_8_subtile) { 5448 TEST_REQUIRES_ARM_NEON; 5449 for (uint32_t n = 16; n <= 24; n += 8) { 5450 for (size_t k = 1; k <= 80; k += 17) { 5451 for (uint32_t m = 1; m <= 2; m++) { 5452 GemmMicrokernelTester() 5453 .mr(2) 5454 .nr(8) 5455 .kr(4) 5456 .sr(2) 5457 .m(m) 5458 .n(n) 5459 .k(k) 5460 .iterations(1) 5461 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5462 } 5463 } 5464 } 5465 } 5466 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,small_kernel)5467 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, small_kernel) { 5468 TEST_REQUIRES_ARM_NEON; 5469 for (size_t k = 1; k <= 80; k += 17) { 5470 GemmMicrokernelTester() 5471 .mr(2) 5472 .nr(8) 5473 .kr(4) 5474 .sr(2) 5475 .m(2) 5476 .n(8) 5477 .k(k) 5478 .ks(3) 5479 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5480 } 5481 } 5482 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,small_kernel_subtile)5483 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, small_kernel_subtile) { 5484 TEST_REQUIRES_ARM_NEON; 5485 for (size_t k = 1; k <= 80; k += 17) { 5486 for (uint32_t n = 1; n <= 8; n++) { 5487 for (uint32_t m = 1; m <= 2; m++) { 5488 GemmMicrokernelTester() 5489 .mr(2) 5490 .nr(8) 5491 .kr(4) 5492 .sr(2) 5493 .m(m) 5494 .n(n) 5495 .k(k) 5496 .ks(3) 5497 .iterations(1) 5498 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5499 } 5500 } 5501 } 5502 } 5503 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,n_gt_8_small_kernel)5504 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, n_gt_8_small_kernel) { 5505 TEST_REQUIRES_ARM_NEON; 5506 for (uint32_t n = 9; n < 16; n++) { 5507 for (size_t k = 1; k <= 80; k += 17) { 5508 GemmMicrokernelTester() 5509 .mr(2) 5510 .nr(8) 5511 .kr(4) 5512 .sr(2) 5513 .m(2) 5514 .n(n) 5515 .k(k) 5516 .ks(3) 5517 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5518 } 5519 } 5520 } 5521 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,n_div_8_small_kernel)5522 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, n_div_8_small_kernel) { 5523 TEST_REQUIRES_ARM_NEON; 5524 for (uint32_t n = 16; n <= 24; n += 8) { 5525 for (size_t k = 1; k <= 80; k += 17) { 5526 GemmMicrokernelTester() 5527 .mr(2) 5528 .nr(8) 5529 .kr(4) 5530 .sr(2) 5531 .m(2) 5532 .n(n) 5533 .k(k) 5534 .ks(3) 5535 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5536 } 5537 } 5538 } 5539 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,strided_cm_subtile)5540 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, strided_cm_subtile) { 5541 TEST_REQUIRES_ARM_NEON; 5542 for (size_t k = 1; k <= 80; k += 17) { 5543 for (uint32_t n = 1; n <= 8; n++) { 5544 for (uint32_t m = 1; m <= 2; m++) { 5545 GemmMicrokernelTester() 5546 .mr(2) 5547 .nr(8) 5548 .kr(4) 5549 .sr(2) 5550 .m(m) 5551 .n(n) 5552 .k(k) 5553 .cm_stride(11) 5554 .iterations(1) 5555 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5556 } 5557 } 5558 } 5559 } 5560 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,a_offset)5561 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, a_offset) { 5562 TEST_REQUIRES_ARM_NEON; 5563 for (size_t k = 1; k <= 80; k += 17) { 5564 GemmMicrokernelTester() 5565 .mr(2) 5566 .nr(8) 5567 .kr(4) 5568 .sr(2) 5569 .m(2) 5570 .n(8) 5571 .k(k) 5572 .ks(3) 5573 .a_offset(163) 5574 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5575 } 5576 } 5577 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,zero)5578 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, zero) { 5579 TEST_REQUIRES_ARM_NEON; 5580 for (size_t k = 1; k <= 80; k += 17) { 5581 for (uint32_t mz = 0; mz < 2; mz++) { 5582 GemmMicrokernelTester() 5583 .mr(2) 5584 .nr(8) 5585 .kr(4) 5586 .sr(2) 5587 .m(2) 5588 .n(8) 5589 .k(k) 5590 .ks(3) 5591 .a_offset(163) 5592 .zero_index(mz) 5593 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5594 } 5595 } 5596 } 5597 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,qmin)5598 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, qmin) { 5599 TEST_REQUIRES_ARM_NEON; 5600 GemmMicrokernelTester() 5601 .mr(2) 5602 .nr(8) 5603 .kr(4) 5604 .sr(2) 5605 .m(2) 5606 .n(8) 5607 .k(16) 5608 .qmin(128) 5609 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5610 } 5611 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,qmax)5612 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, qmax) { 5613 TEST_REQUIRES_ARM_NEON; 5614 GemmMicrokernelTester() 5615 .mr(2) 5616 .nr(8) 5617 .kr(4) 5618 .sr(2) 5619 .m(2) 5620 .n(8) 5621 .k(16) 5622 .qmax(128) 5623 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5624 } 5625 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL,strided_cm)5626 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4S2__NEON_MLAL, strided_cm) { 5627 TEST_REQUIRES_ARM_NEON; 5628 GemmMicrokernelTester() 5629 .mr(2) 5630 .nr(8) 5631 .kr(4) 5632 .sr(2) 5633 .m(2) 5634 .n(8) 5635 .k(16) 5636 .cm_stride(11) 5637 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5638 } 5639 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 5640 5641 5642 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_eq_16)5643 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_eq_16) { 5644 TEST_REQUIRES_ARM_NEON; 5645 GemmMicrokernelTester() 5646 .mr(1) 5647 .nr(16) 5648 .kr(4) 5649 .sr(2) 5650 .m(1) 5651 .n(16) 5652 .k(16) 5653 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5654 } 5655 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,strided_cn)5656 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, strided_cn) { 5657 TEST_REQUIRES_ARM_NEON; 5658 GemmMicrokernelTester() 5659 .mr(1) 5660 .nr(16) 5661 .kr(4) 5662 .sr(2) 5663 .m(1) 5664 .n(16) 5665 .k(16) 5666 .cn_stride(19) 5667 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5668 } 5669 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_eq_16_subtile)5670 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_eq_16_subtile) { 5671 TEST_REQUIRES_ARM_NEON; 5672 for (uint32_t n = 1; n <= 16; n++) { 5673 for (uint32_t m = 1; m <= 1; m++) { 5674 GemmMicrokernelTester() 5675 .mr(1) 5676 .nr(16) 5677 .kr(4) 5678 .sr(2) 5679 .m(m) 5680 .n(n) 5681 .k(16) 5682 .iterations(1) 5683 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5684 } 5685 } 5686 } 5687 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_eq_16_subtile_m)5688 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_eq_16_subtile_m) { 5689 TEST_REQUIRES_ARM_NEON; 5690 for (uint32_t m = 1; m <= 1; m++) { 5691 GemmMicrokernelTester() 5692 .mr(1) 5693 .nr(16) 5694 .kr(4) 5695 .sr(2) 5696 .m(m) 5697 .n(16) 5698 .k(16) 5699 .iterations(1) 5700 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5701 } 5702 } 5703 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_eq_16_subtile_n)5704 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_eq_16_subtile_n) { 5705 TEST_REQUIRES_ARM_NEON; 5706 for (uint32_t n = 1; n <= 16; n++) { 5707 GemmMicrokernelTester() 5708 .mr(1) 5709 .nr(16) 5710 .kr(4) 5711 .sr(2) 5712 .m(1) 5713 .n(n) 5714 .k(16) 5715 .iterations(1) 5716 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5717 } 5718 } 5719 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_lt_16)5720 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_lt_16) { 5721 TEST_REQUIRES_ARM_NEON; 5722 for (size_t k = 1; k < 16; k++) { 5723 GemmMicrokernelTester() 5724 .mr(1) 5725 .nr(16) 5726 .kr(4) 5727 .sr(2) 5728 .m(1) 5729 .n(16) 5730 .k(k) 5731 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5732 } 5733 } 5734 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_lt_16_subtile)5735 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_lt_16_subtile) { 5736 TEST_REQUIRES_ARM_NEON; 5737 for (size_t k = 1; k < 16; k++) { 5738 for (uint32_t n = 1; n <= 16; n++) { 5739 for (uint32_t m = 1; m <= 1; m++) { 5740 GemmMicrokernelTester() 5741 .mr(1) 5742 .nr(16) 5743 .kr(4) 5744 .sr(2) 5745 .m(m) 5746 .n(n) 5747 .k(k) 5748 .iterations(1) 5749 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5750 } 5751 } 5752 } 5753 } 5754 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_gt_16)5755 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_gt_16) { 5756 TEST_REQUIRES_ARM_NEON; 5757 for (size_t k = 17; k < 32; k++) { 5758 GemmMicrokernelTester() 5759 .mr(1) 5760 .nr(16) 5761 .kr(4) 5762 .sr(2) 5763 .m(1) 5764 .n(16) 5765 .k(k) 5766 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5767 } 5768 } 5769 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_gt_16_subtile)5770 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_gt_16_subtile) { 5771 TEST_REQUIRES_ARM_NEON; 5772 for (size_t k = 17; k < 32; k++) { 5773 for (uint32_t n = 1; n <= 16; n++) { 5774 for (uint32_t m = 1; m <= 1; m++) { 5775 GemmMicrokernelTester() 5776 .mr(1) 5777 .nr(16) 5778 .kr(4) 5779 .sr(2) 5780 .m(m) 5781 .n(n) 5782 .k(k) 5783 .iterations(1) 5784 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5785 } 5786 } 5787 } 5788 } 5789 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_div_16)5790 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_div_16) { 5791 TEST_REQUIRES_ARM_NEON; 5792 for (size_t k = 32; k <= 160; k += 16) { 5793 GemmMicrokernelTester() 5794 .mr(1) 5795 .nr(16) 5796 .kr(4) 5797 .sr(2) 5798 .m(1) 5799 .n(16) 5800 .k(k) 5801 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5802 } 5803 } 5804 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_div_16_subtile)5805 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_div_16_subtile) { 5806 TEST_REQUIRES_ARM_NEON; 5807 for (size_t k = 32; k <= 160; k += 16) { 5808 for (uint32_t n = 1; n <= 16; n++) { 5809 for (uint32_t m = 1; m <= 1; m++) { 5810 GemmMicrokernelTester() 5811 .mr(1) 5812 .nr(16) 5813 .kr(4) 5814 .sr(2) 5815 .m(m) 5816 .n(n) 5817 .k(k) 5818 .iterations(1) 5819 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5820 } 5821 } 5822 } 5823 } 5824 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,n_gt_16)5825 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_gt_16) { 5826 TEST_REQUIRES_ARM_NEON; 5827 for (uint32_t n = 17; n < 32; n++) { 5828 for (size_t k = 1; k <= 80; k += 17) { 5829 GemmMicrokernelTester() 5830 .mr(1) 5831 .nr(16) 5832 .kr(4) 5833 .sr(2) 5834 .m(1) 5835 .n(n) 5836 .k(k) 5837 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5838 } 5839 } 5840 } 5841 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,n_gt_16_strided_cn)5842 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_gt_16_strided_cn) { 5843 TEST_REQUIRES_ARM_NEON; 5844 for (uint32_t n = 17; n < 32; n++) { 5845 for (size_t k = 1; k <= 80; k += 17) { 5846 GemmMicrokernelTester() 5847 .mr(1) 5848 .nr(16) 5849 .kr(4) 5850 .sr(2) 5851 .m(1) 5852 .n(n) 5853 .k(k) 5854 .cn_stride(19) 5855 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5856 } 5857 } 5858 } 5859 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,n_gt_16_subtile)5860 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_gt_16_subtile) { 5861 TEST_REQUIRES_ARM_NEON; 5862 for (uint32_t n = 17; n < 32; n++) { 5863 for (size_t k = 1; k <= 80; k += 17) { 5864 for (uint32_t m = 1; m <= 1; m++) { 5865 GemmMicrokernelTester() 5866 .mr(1) 5867 .nr(16) 5868 .kr(4) 5869 .sr(2) 5870 .m(m) 5871 .n(n) 5872 .k(k) 5873 .iterations(1) 5874 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5875 } 5876 } 5877 } 5878 } 5879 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,n_div_16)5880 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_div_16) { 5881 TEST_REQUIRES_ARM_NEON; 5882 for (uint32_t n = 32; n <= 48; n += 16) { 5883 for (size_t k = 1; k <= 80; k += 17) { 5884 GemmMicrokernelTester() 5885 .mr(1) 5886 .nr(16) 5887 .kr(4) 5888 .sr(2) 5889 .m(1) 5890 .n(n) 5891 .k(k) 5892 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5893 } 5894 } 5895 } 5896 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,n_div_16_strided_cn)5897 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_div_16_strided_cn) { 5898 TEST_REQUIRES_ARM_NEON; 5899 for (uint32_t n = 32; n <= 48; n += 16) { 5900 for (size_t k = 1; k <= 80; k += 17) { 5901 GemmMicrokernelTester() 5902 .mr(1) 5903 .nr(16) 5904 .kr(4) 5905 .sr(2) 5906 .m(1) 5907 .n(n) 5908 .k(k) 5909 .cn_stride(19) 5910 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5911 } 5912 } 5913 } 5914 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,n_div_16_subtile)5915 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_div_16_subtile) { 5916 TEST_REQUIRES_ARM_NEON; 5917 for (uint32_t n = 32; n <= 48; n += 16) { 5918 for (size_t k = 1; k <= 80; k += 17) { 5919 for (uint32_t m = 1; m <= 1; m++) { 5920 GemmMicrokernelTester() 5921 .mr(1) 5922 .nr(16) 5923 .kr(4) 5924 .sr(2) 5925 .m(m) 5926 .n(n) 5927 .k(k) 5928 .iterations(1) 5929 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5930 } 5931 } 5932 } 5933 } 5934 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,small_kernel)5935 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, small_kernel) { 5936 TEST_REQUIRES_ARM_NEON; 5937 for (size_t k = 1; k <= 80; k += 17) { 5938 GemmMicrokernelTester() 5939 .mr(1) 5940 .nr(16) 5941 .kr(4) 5942 .sr(2) 5943 .m(1) 5944 .n(16) 5945 .k(k) 5946 .ks(3) 5947 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5948 } 5949 } 5950 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,small_kernel_subtile)5951 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, small_kernel_subtile) { 5952 TEST_REQUIRES_ARM_NEON; 5953 for (size_t k = 1; k <= 80; k += 17) { 5954 for (uint32_t n = 1; n <= 16; n++) { 5955 for (uint32_t m = 1; m <= 1; m++) { 5956 GemmMicrokernelTester() 5957 .mr(1) 5958 .nr(16) 5959 .kr(4) 5960 .sr(2) 5961 .m(m) 5962 .n(n) 5963 .k(k) 5964 .ks(3) 5965 .iterations(1) 5966 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5967 } 5968 } 5969 } 5970 } 5971 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,n_gt_16_small_kernel)5972 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_gt_16_small_kernel) { 5973 TEST_REQUIRES_ARM_NEON; 5974 for (uint32_t n = 17; n < 32; n++) { 5975 for (size_t k = 1; k <= 80; k += 17) { 5976 GemmMicrokernelTester() 5977 .mr(1) 5978 .nr(16) 5979 .kr(4) 5980 .sr(2) 5981 .m(1) 5982 .n(n) 5983 .k(k) 5984 .ks(3) 5985 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5986 } 5987 } 5988 } 5989 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,n_div_16_small_kernel)5990 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_div_16_small_kernel) { 5991 TEST_REQUIRES_ARM_NEON; 5992 for (uint32_t n = 32; n <= 48; n += 16) { 5993 for (size_t k = 1; k <= 80; k += 17) { 5994 GemmMicrokernelTester() 5995 .mr(1) 5996 .nr(16) 5997 .kr(4) 5998 .sr(2) 5999 .m(1) 6000 .n(n) 6001 .k(k) 6002 .ks(3) 6003 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6004 } 6005 } 6006 } 6007 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,strided_cm_subtile)6008 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, strided_cm_subtile) { 6009 TEST_REQUIRES_ARM_NEON; 6010 for (size_t k = 1; k <= 80; k += 17) { 6011 for (uint32_t n = 1; n <= 16; n++) { 6012 for (uint32_t m = 1; m <= 1; m++) { 6013 GemmMicrokernelTester() 6014 .mr(1) 6015 .nr(16) 6016 .kr(4) 6017 .sr(2) 6018 .m(m) 6019 .n(n) 6020 .k(k) 6021 .cm_stride(19) 6022 .iterations(1) 6023 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6024 } 6025 } 6026 } 6027 } 6028 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,a_offset)6029 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, a_offset) { 6030 TEST_REQUIRES_ARM_NEON; 6031 for (size_t k = 1; k <= 80; k += 17) { 6032 GemmMicrokernelTester() 6033 .mr(1) 6034 .nr(16) 6035 .kr(4) 6036 .sr(2) 6037 .m(1) 6038 .n(16) 6039 .k(k) 6040 .ks(3) 6041 .a_offset(83) 6042 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6043 } 6044 } 6045 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,zero)6046 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, zero) { 6047 TEST_REQUIRES_ARM_NEON; 6048 for (size_t k = 1; k <= 80; k += 17) { 6049 for (uint32_t mz = 0; mz < 1; mz++) { 6050 GemmMicrokernelTester() 6051 .mr(1) 6052 .nr(16) 6053 .kr(4) 6054 .sr(2) 6055 .m(1) 6056 .n(16) 6057 .k(k) 6058 .ks(3) 6059 .a_offset(83) 6060 .zero_index(mz) 6061 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6062 } 6063 } 6064 } 6065 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,qmin)6066 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, qmin) { 6067 TEST_REQUIRES_ARM_NEON; 6068 GemmMicrokernelTester() 6069 .mr(1) 6070 .nr(16) 6071 .kr(4) 6072 .sr(2) 6073 .m(1) 6074 .n(16) 6075 .k(16) 6076 .qmin(128) 6077 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6078 } 6079 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,qmax)6080 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, qmax) { 6081 TEST_REQUIRES_ARM_NEON; 6082 GemmMicrokernelTester() 6083 .mr(1) 6084 .nr(16) 6085 .kr(4) 6086 .sr(2) 6087 .m(1) 6088 .n(16) 6089 .k(16) 6090 .qmax(128) 6091 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6092 } 6093 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,strided_cm)6094 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, strided_cm) { 6095 TEST_REQUIRES_ARM_NEON; 6096 GemmMicrokernelTester() 6097 .mr(1) 6098 .nr(16) 6099 .kr(4) 6100 .sr(2) 6101 .m(1) 6102 .n(16) 6103 .k(16) 6104 .cm_stride(19) 6105 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6106 } 6107 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 6108 6109 6110 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,k_eq_8)6111 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, k_eq_8) { 6112 TEST_REQUIRES_ARM_NEON; 6113 GemmMicrokernelTester() 6114 .mr(2) 6115 .nr(8) 6116 .kr(2) 6117 .sr(4) 6118 .m(2) 6119 .n(8) 6120 .k(8) 6121 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6122 } 6123 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,strided_cn)6124 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, strided_cn) { 6125 TEST_REQUIRES_ARM_NEON; 6126 GemmMicrokernelTester() 6127 .mr(2) 6128 .nr(8) 6129 .kr(2) 6130 .sr(4) 6131 .m(2) 6132 .n(8) 6133 .k(8) 6134 .cn_stride(11) 6135 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6136 } 6137 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,k_eq_8_subtile)6138 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, k_eq_8_subtile) { 6139 TEST_REQUIRES_ARM_NEON; 6140 for (uint32_t n = 1; n <= 8; n++) { 6141 for (uint32_t m = 1; m <= 2; m++) { 6142 GemmMicrokernelTester() 6143 .mr(2) 6144 .nr(8) 6145 .kr(2) 6146 .sr(4) 6147 .m(m) 6148 .n(n) 6149 .k(8) 6150 .iterations(1) 6151 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6152 } 6153 } 6154 } 6155 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,k_eq_8_subtile_m)6156 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, k_eq_8_subtile_m) { 6157 TEST_REQUIRES_ARM_NEON; 6158 for (uint32_t m = 1; m <= 2; m++) { 6159 GemmMicrokernelTester() 6160 .mr(2) 6161 .nr(8) 6162 .kr(2) 6163 .sr(4) 6164 .m(m) 6165 .n(8) 6166 .k(8) 6167 .iterations(1) 6168 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6169 } 6170 } 6171 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,k_eq_8_subtile_n)6172 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, k_eq_8_subtile_n) { 6173 TEST_REQUIRES_ARM_NEON; 6174 for (uint32_t n = 1; n <= 8; n++) { 6175 GemmMicrokernelTester() 6176 .mr(2) 6177 .nr(8) 6178 .kr(2) 6179 .sr(4) 6180 .m(2) 6181 .n(n) 6182 .k(8) 6183 .iterations(1) 6184 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6185 } 6186 } 6187 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,k_lt_8)6188 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, k_lt_8) { 6189 TEST_REQUIRES_ARM_NEON; 6190 for (size_t k = 1; k < 8; k++) { 6191 GemmMicrokernelTester() 6192 .mr(2) 6193 .nr(8) 6194 .kr(2) 6195 .sr(4) 6196 .m(2) 6197 .n(8) 6198 .k(k) 6199 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6200 } 6201 } 6202 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,k_lt_8_subtile)6203 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, k_lt_8_subtile) { 6204 TEST_REQUIRES_ARM_NEON; 6205 for (size_t k = 1; k < 8; k++) { 6206 for (uint32_t n = 1; n <= 8; n++) { 6207 for (uint32_t m = 1; m <= 2; m++) { 6208 GemmMicrokernelTester() 6209 .mr(2) 6210 .nr(8) 6211 .kr(2) 6212 .sr(4) 6213 .m(m) 6214 .n(n) 6215 .k(k) 6216 .iterations(1) 6217 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6218 } 6219 } 6220 } 6221 } 6222 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,k_gt_8)6223 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, k_gt_8) { 6224 TEST_REQUIRES_ARM_NEON; 6225 for (size_t k = 9; k < 16; k++) { 6226 GemmMicrokernelTester() 6227 .mr(2) 6228 .nr(8) 6229 .kr(2) 6230 .sr(4) 6231 .m(2) 6232 .n(8) 6233 .k(k) 6234 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6235 } 6236 } 6237 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,k_gt_8_subtile)6238 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, k_gt_8_subtile) { 6239 TEST_REQUIRES_ARM_NEON; 6240 for (size_t k = 9; k < 16; k++) { 6241 for (uint32_t n = 1; n <= 8; n++) { 6242 for (uint32_t m = 1; m <= 2; m++) { 6243 GemmMicrokernelTester() 6244 .mr(2) 6245 .nr(8) 6246 .kr(2) 6247 .sr(4) 6248 .m(m) 6249 .n(n) 6250 .k(k) 6251 .iterations(1) 6252 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6253 } 6254 } 6255 } 6256 } 6257 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,k_div_8)6258 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, k_div_8) { 6259 TEST_REQUIRES_ARM_NEON; 6260 for (size_t k = 16; k <= 80; k += 8) { 6261 GemmMicrokernelTester() 6262 .mr(2) 6263 .nr(8) 6264 .kr(2) 6265 .sr(4) 6266 .m(2) 6267 .n(8) 6268 .k(k) 6269 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6270 } 6271 } 6272 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,k_div_8_subtile)6273 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, k_div_8_subtile) { 6274 TEST_REQUIRES_ARM_NEON; 6275 for (size_t k = 16; k <= 80; k += 8) { 6276 for (uint32_t n = 1; n <= 8; n++) { 6277 for (uint32_t m = 1; m <= 2; m++) { 6278 GemmMicrokernelTester() 6279 .mr(2) 6280 .nr(8) 6281 .kr(2) 6282 .sr(4) 6283 .m(m) 6284 .n(n) 6285 .k(k) 6286 .iterations(1) 6287 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6288 } 6289 } 6290 } 6291 } 6292 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,n_gt_8)6293 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, n_gt_8) { 6294 TEST_REQUIRES_ARM_NEON; 6295 for (uint32_t n = 9; n < 16; n++) { 6296 for (size_t k = 1; k <= 40; k += 9) { 6297 GemmMicrokernelTester() 6298 .mr(2) 6299 .nr(8) 6300 .kr(2) 6301 .sr(4) 6302 .m(2) 6303 .n(n) 6304 .k(k) 6305 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6306 } 6307 } 6308 } 6309 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,n_gt_8_strided_cn)6310 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, n_gt_8_strided_cn) { 6311 TEST_REQUIRES_ARM_NEON; 6312 for (uint32_t n = 9; n < 16; n++) { 6313 for (size_t k = 1; k <= 40; k += 9) { 6314 GemmMicrokernelTester() 6315 .mr(2) 6316 .nr(8) 6317 .kr(2) 6318 .sr(4) 6319 .m(2) 6320 .n(n) 6321 .k(k) 6322 .cn_stride(11) 6323 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6324 } 6325 } 6326 } 6327 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,n_gt_8_subtile)6328 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, n_gt_8_subtile) { 6329 TEST_REQUIRES_ARM_NEON; 6330 for (uint32_t n = 9; n < 16; n++) { 6331 for (size_t k = 1; k <= 40; k += 9) { 6332 for (uint32_t m = 1; m <= 2; m++) { 6333 GemmMicrokernelTester() 6334 .mr(2) 6335 .nr(8) 6336 .kr(2) 6337 .sr(4) 6338 .m(m) 6339 .n(n) 6340 .k(k) 6341 .iterations(1) 6342 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6343 } 6344 } 6345 } 6346 } 6347 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,n_div_8)6348 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, n_div_8) { 6349 TEST_REQUIRES_ARM_NEON; 6350 for (uint32_t n = 16; n <= 24; n += 8) { 6351 for (size_t k = 1; k <= 40; k += 9) { 6352 GemmMicrokernelTester() 6353 .mr(2) 6354 .nr(8) 6355 .kr(2) 6356 .sr(4) 6357 .m(2) 6358 .n(n) 6359 .k(k) 6360 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6361 } 6362 } 6363 } 6364 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,n_div_8_strided_cn)6365 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, n_div_8_strided_cn) { 6366 TEST_REQUIRES_ARM_NEON; 6367 for (uint32_t n = 16; n <= 24; n += 8) { 6368 for (size_t k = 1; k <= 40; k += 9) { 6369 GemmMicrokernelTester() 6370 .mr(2) 6371 .nr(8) 6372 .kr(2) 6373 .sr(4) 6374 .m(2) 6375 .n(n) 6376 .k(k) 6377 .cn_stride(11) 6378 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6379 } 6380 } 6381 } 6382 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,n_div_8_subtile)6383 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, n_div_8_subtile) { 6384 TEST_REQUIRES_ARM_NEON; 6385 for (uint32_t n = 16; n <= 24; n += 8) { 6386 for (size_t k = 1; k <= 40; k += 9) { 6387 for (uint32_t m = 1; m <= 2; m++) { 6388 GemmMicrokernelTester() 6389 .mr(2) 6390 .nr(8) 6391 .kr(2) 6392 .sr(4) 6393 .m(m) 6394 .n(n) 6395 .k(k) 6396 .iterations(1) 6397 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6398 } 6399 } 6400 } 6401 } 6402 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,small_kernel)6403 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, small_kernel) { 6404 TEST_REQUIRES_ARM_NEON; 6405 for (size_t k = 1; k <= 40; k += 9) { 6406 GemmMicrokernelTester() 6407 .mr(2) 6408 .nr(8) 6409 .kr(2) 6410 .sr(4) 6411 .m(2) 6412 .n(8) 6413 .k(k) 6414 .ks(3) 6415 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6416 } 6417 } 6418 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,small_kernel_subtile)6419 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, small_kernel_subtile) { 6420 TEST_REQUIRES_ARM_NEON; 6421 for (size_t k = 1; k <= 40; k += 9) { 6422 for (uint32_t n = 1; n <= 8; n++) { 6423 for (uint32_t m = 1; m <= 2; m++) { 6424 GemmMicrokernelTester() 6425 .mr(2) 6426 .nr(8) 6427 .kr(2) 6428 .sr(4) 6429 .m(m) 6430 .n(n) 6431 .k(k) 6432 .ks(3) 6433 .iterations(1) 6434 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6435 } 6436 } 6437 } 6438 } 6439 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,n_gt_8_small_kernel)6440 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, n_gt_8_small_kernel) { 6441 TEST_REQUIRES_ARM_NEON; 6442 for (uint32_t n = 9; n < 16; n++) { 6443 for (size_t k = 1; k <= 40; k += 9) { 6444 GemmMicrokernelTester() 6445 .mr(2) 6446 .nr(8) 6447 .kr(2) 6448 .sr(4) 6449 .m(2) 6450 .n(n) 6451 .k(k) 6452 .ks(3) 6453 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6454 } 6455 } 6456 } 6457 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,n_div_8_small_kernel)6458 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, n_div_8_small_kernel) { 6459 TEST_REQUIRES_ARM_NEON; 6460 for (uint32_t n = 16; n <= 24; n += 8) { 6461 for (size_t k = 1; k <= 40; k += 9) { 6462 GemmMicrokernelTester() 6463 .mr(2) 6464 .nr(8) 6465 .kr(2) 6466 .sr(4) 6467 .m(2) 6468 .n(n) 6469 .k(k) 6470 .ks(3) 6471 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6472 } 6473 } 6474 } 6475 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,strided_cm_subtile)6476 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, strided_cm_subtile) { 6477 TEST_REQUIRES_ARM_NEON; 6478 for (size_t k = 1; k <= 40; k += 9) { 6479 for (uint32_t n = 1; n <= 8; n++) { 6480 for (uint32_t m = 1; m <= 2; m++) { 6481 GemmMicrokernelTester() 6482 .mr(2) 6483 .nr(8) 6484 .kr(2) 6485 .sr(4) 6486 .m(m) 6487 .n(n) 6488 .k(k) 6489 .cm_stride(11) 6490 .iterations(1) 6491 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6492 } 6493 } 6494 } 6495 } 6496 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,a_offset)6497 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, a_offset) { 6498 TEST_REQUIRES_ARM_NEON; 6499 for (size_t k = 1; k <= 40; k += 9) { 6500 GemmMicrokernelTester() 6501 .mr(2) 6502 .nr(8) 6503 .kr(2) 6504 .sr(4) 6505 .m(2) 6506 .n(8) 6507 .k(k) 6508 .ks(3) 6509 .a_offset(83) 6510 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6511 } 6512 } 6513 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,zero)6514 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, zero) { 6515 TEST_REQUIRES_ARM_NEON; 6516 for (size_t k = 1; k <= 40; k += 9) { 6517 for (uint32_t mz = 0; mz < 2; mz++) { 6518 GemmMicrokernelTester() 6519 .mr(2) 6520 .nr(8) 6521 .kr(2) 6522 .sr(4) 6523 .m(2) 6524 .n(8) 6525 .k(k) 6526 .ks(3) 6527 .a_offset(83) 6528 .zero_index(mz) 6529 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6530 } 6531 } 6532 } 6533 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,qmin)6534 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, qmin) { 6535 TEST_REQUIRES_ARM_NEON; 6536 GemmMicrokernelTester() 6537 .mr(2) 6538 .nr(8) 6539 .kr(2) 6540 .sr(4) 6541 .m(2) 6542 .n(8) 6543 .k(8) 6544 .qmin(128) 6545 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6546 } 6547 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,qmax)6548 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, qmax) { 6549 TEST_REQUIRES_ARM_NEON; 6550 GemmMicrokernelTester() 6551 .mr(2) 6552 .nr(8) 6553 .kr(2) 6554 .sr(4) 6555 .m(2) 6556 .n(8) 6557 .k(8) 6558 .qmax(128) 6559 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6560 } 6561 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL,strided_cm)6562 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2S4__NEON_MULL, strided_cm) { 6563 TEST_REQUIRES_ARM_NEON; 6564 GemmMicrokernelTester() 6565 .mr(2) 6566 .nr(8) 6567 .kr(2) 6568 .sr(4) 6569 .m(2) 6570 .n(8) 6571 .k(8) 6572 .cm_stride(11) 6573 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6574 } 6575 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 6576 6577 6578 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,k_eq_8)6579 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, k_eq_8) { 6580 TEST_REQUIRES_ARM_NEON; 6581 GemmMicrokernelTester() 6582 .mr(1) 6583 .nr(16) 6584 .kr(2) 6585 .sr(4) 6586 .m(1) 6587 .n(16) 6588 .k(8) 6589 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6590 } 6591 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,strided_cn)6592 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, strided_cn) { 6593 TEST_REQUIRES_ARM_NEON; 6594 GemmMicrokernelTester() 6595 .mr(1) 6596 .nr(16) 6597 .kr(2) 6598 .sr(4) 6599 .m(1) 6600 .n(16) 6601 .k(8) 6602 .cn_stride(19) 6603 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6604 } 6605 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,k_eq_8_subtile)6606 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, k_eq_8_subtile) { 6607 TEST_REQUIRES_ARM_NEON; 6608 for (uint32_t n = 1; n <= 16; n++) { 6609 for (uint32_t m = 1; m <= 1; m++) { 6610 GemmMicrokernelTester() 6611 .mr(1) 6612 .nr(16) 6613 .kr(2) 6614 .sr(4) 6615 .m(m) 6616 .n(n) 6617 .k(8) 6618 .iterations(1) 6619 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6620 } 6621 } 6622 } 6623 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,k_eq_8_subtile_m)6624 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, k_eq_8_subtile_m) { 6625 TEST_REQUIRES_ARM_NEON; 6626 for (uint32_t m = 1; m <= 1; m++) { 6627 GemmMicrokernelTester() 6628 .mr(1) 6629 .nr(16) 6630 .kr(2) 6631 .sr(4) 6632 .m(m) 6633 .n(16) 6634 .k(8) 6635 .iterations(1) 6636 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6637 } 6638 } 6639 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,k_eq_8_subtile_n)6640 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, k_eq_8_subtile_n) { 6641 TEST_REQUIRES_ARM_NEON; 6642 for (uint32_t n = 1; n <= 16; n++) { 6643 GemmMicrokernelTester() 6644 .mr(1) 6645 .nr(16) 6646 .kr(2) 6647 .sr(4) 6648 .m(1) 6649 .n(n) 6650 .k(8) 6651 .iterations(1) 6652 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6653 } 6654 } 6655 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,k_lt_8)6656 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, k_lt_8) { 6657 TEST_REQUIRES_ARM_NEON; 6658 for (size_t k = 1; k < 8; k++) { 6659 GemmMicrokernelTester() 6660 .mr(1) 6661 .nr(16) 6662 .kr(2) 6663 .sr(4) 6664 .m(1) 6665 .n(16) 6666 .k(k) 6667 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6668 } 6669 } 6670 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,k_lt_8_subtile)6671 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, k_lt_8_subtile) { 6672 TEST_REQUIRES_ARM_NEON; 6673 for (size_t k = 1; k < 8; k++) { 6674 for (uint32_t n = 1; n <= 16; n++) { 6675 for (uint32_t m = 1; m <= 1; m++) { 6676 GemmMicrokernelTester() 6677 .mr(1) 6678 .nr(16) 6679 .kr(2) 6680 .sr(4) 6681 .m(m) 6682 .n(n) 6683 .k(k) 6684 .iterations(1) 6685 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6686 } 6687 } 6688 } 6689 } 6690 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,k_gt_8)6691 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, k_gt_8) { 6692 TEST_REQUIRES_ARM_NEON; 6693 for (size_t k = 9; k < 16; k++) { 6694 GemmMicrokernelTester() 6695 .mr(1) 6696 .nr(16) 6697 .kr(2) 6698 .sr(4) 6699 .m(1) 6700 .n(16) 6701 .k(k) 6702 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6703 } 6704 } 6705 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,k_gt_8_subtile)6706 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, k_gt_8_subtile) { 6707 TEST_REQUIRES_ARM_NEON; 6708 for (size_t k = 9; k < 16; k++) { 6709 for (uint32_t n = 1; n <= 16; n++) { 6710 for (uint32_t m = 1; m <= 1; m++) { 6711 GemmMicrokernelTester() 6712 .mr(1) 6713 .nr(16) 6714 .kr(2) 6715 .sr(4) 6716 .m(m) 6717 .n(n) 6718 .k(k) 6719 .iterations(1) 6720 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6721 } 6722 } 6723 } 6724 } 6725 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,k_div_8)6726 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, k_div_8) { 6727 TEST_REQUIRES_ARM_NEON; 6728 for (size_t k = 16; k <= 80; k += 8) { 6729 GemmMicrokernelTester() 6730 .mr(1) 6731 .nr(16) 6732 .kr(2) 6733 .sr(4) 6734 .m(1) 6735 .n(16) 6736 .k(k) 6737 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6738 } 6739 } 6740 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,k_div_8_subtile)6741 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, k_div_8_subtile) { 6742 TEST_REQUIRES_ARM_NEON; 6743 for (size_t k = 16; k <= 80; k += 8) { 6744 for (uint32_t n = 1; n <= 16; n++) { 6745 for (uint32_t m = 1; m <= 1; m++) { 6746 GemmMicrokernelTester() 6747 .mr(1) 6748 .nr(16) 6749 .kr(2) 6750 .sr(4) 6751 .m(m) 6752 .n(n) 6753 .k(k) 6754 .iterations(1) 6755 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6756 } 6757 } 6758 } 6759 } 6760 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,n_gt_16)6761 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, n_gt_16) { 6762 TEST_REQUIRES_ARM_NEON; 6763 for (uint32_t n = 17; n < 32; n++) { 6764 for (size_t k = 1; k <= 40; k += 9) { 6765 GemmMicrokernelTester() 6766 .mr(1) 6767 .nr(16) 6768 .kr(2) 6769 .sr(4) 6770 .m(1) 6771 .n(n) 6772 .k(k) 6773 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6774 } 6775 } 6776 } 6777 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,n_gt_16_strided_cn)6778 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, n_gt_16_strided_cn) { 6779 TEST_REQUIRES_ARM_NEON; 6780 for (uint32_t n = 17; n < 32; n++) { 6781 for (size_t k = 1; k <= 40; k += 9) { 6782 GemmMicrokernelTester() 6783 .mr(1) 6784 .nr(16) 6785 .kr(2) 6786 .sr(4) 6787 .m(1) 6788 .n(n) 6789 .k(k) 6790 .cn_stride(19) 6791 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6792 } 6793 } 6794 } 6795 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,n_gt_16_subtile)6796 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, n_gt_16_subtile) { 6797 TEST_REQUIRES_ARM_NEON; 6798 for (uint32_t n = 17; n < 32; n++) { 6799 for (size_t k = 1; k <= 40; k += 9) { 6800 for (uint32_t m = 1; m <= 1; m++) { 6801 GemmMicrokernelTester() 6802 .mr(1) 6803 .nr(16) 6804 .kr(2) 6805 .sr(4) 6806 .m(m) 6807 .n(n) 6808 .k(k) 6809 .iterations(1) 6810 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6811 } 6812 } 6813 } 6814 } 6815 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,n_div_16)6816 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, n_div_16) { 6817 TEST_REQUIRES_ARM_NEON; 6818 for (uint32_t n = 32; n <= 48; n += 16) { 6819 for (size_t k = 1; k <= 40; k += 9) { 6820 GemmMicrokernelTester() 6821 .mr(1) 6822 .nr(16) 6823 .kr(2) 6824 .sr(4) 6825 .m(1) 6826 .n(n) 6827 .k(k) 6828 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6829 } 6830 } 6831 } 6832 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,n_div_16_strided_cn)6833 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, n_div_16_strided_cn) { 6834 TEST_REQUIRES_ARM_NEON; 6835 for (uint32_t n = 32; n <= 48; n += 16) { 6836 for (size_t k = 1; k <= 40; k += 9) { 6837 GemmMicrokernelTester() 6838 .mr(1) 6839 .nr(16) 6840 .kr(2) 6841 .sr(4) 6842 .m(1) 6843 .n(n) 6844 .k(k) 6845 .cn_stride(19) 6846 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6847 } 6848 } 6849 } 6850 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,n_div_16_subtile)6851 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, n_div_16_subtile) { 6852 TEST_REQUIRES_ARM_NEON; 6853 for (uint32_t n = 32; n <= 48; n += 16) { 6854 for (size_t k = 1; k <= 40; k += 9) { 6855 for (uint32_t m = 1; m <= 1; m++) { 6856 GemmMicrokernelTester() 6857 .mr(1) 6858 .nr(16) 6859 .kr(2) 6860 .sr(4) 6861 .m(m) 6862 .n(n) 6863 .k(k) 6864 .iterations(1) 6865 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6866 } 6867 } 6868 } 6869 } 6870 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,small_kernel)6871 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, small_kernel) { 6872 TEST_REQUIRES_ARM_NEON; 6873 for (size_t k = 1; k <= 40; k += 9) { 6874 GemmMicrokernelTester() 6875 .mr(1) 6876 .nr(16) 6877 .kr(2) 6878 .sr(4) 6879 .m(1) 6880 .n(16) 6881 .k(k) 6882 .ks(3) 6883 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6884 } 6885 } 6886 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,small_kernel_subtile)6887 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, small_kernel_subtile) { 6888 TEST_REQUIRES_ARM_NEON; 6889 for (size_t k = 1; k <= 40; k += 9) { 6890 for (uint32_t n = 1; n <= 16; n++) { 6891 for (uint32_t m = 1; m <= 1; m++) { 6892 GemmMicrokernelTester() 6893 .mr(1) 6894 .nr(16) 6895 .kr(2) 6896 .sr(4) 6897 .m(m) 6898 .n(n) 6899 .k(k) 6900 .ks(3) 6901 .iterations(1) 6902 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6903 } 6904 } 6905 } 6906 } 6907 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,n_gt_16_small_kernel)6908 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, n_gt_16_small_kernel) { 6909 TEST_REQUIRES_ARM_NEON; 6910 for (uint32_t n = 17; n < 32; n++) { 6911 for (size_t k = 1; k <= 40; k += 9) { 6912 GemmMicrokernelTester() 6913 .mr(1) 6914 .nr(16) 6915 .kr(2) 6916 .sr(4) 6917 .m(1) 6918 .n(n) 6919 .k(k) 6920 .ks(3) 6921 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6922 } 6923 } 6924 } 6925 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,n_div_16_small_kernel)6926 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, n_div_16_small_kernel) { 6927 TEST_REQUIRES_ARM_NEON; 6928 for (uint32_t n = 32; n <= 48; n += 16) { 6929 for (size_t k = 1; k <= 40; k += 9) { 6930 GemmMicrokernelTester() 6931 .mr(1) 6932 .nr(16) 6933 .kr(2) 6934 .sr(4) 6935 .m(1) 6936 .n(n) 6937 .k(k) 6938 .ks(3) 6939 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6940 } 6941 } 6942 } 6943 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,strided_cm_subtile)6944 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, strided_cm_subtile) { 6945 TEST_REQUIRES_ARM_NEON; 6946 for (size_t k = 1; k <= 40; k += 9) { 6947 for (uint32_t n = 1; n <= 16; n++) { 6948 for (uint32_t m = 1; m <= 1; m++) { 6949 GemmMicrokernelTester() 6950 .mr(1) 6951 .nr(16) 6952 .kr(2) 6953 .sr(4) 6954 .m(m) 6955 .n(n) 6956 .k(k) 6957 .cm_stride(19) 6958 .iterations(1) 6959 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6960 } 6961 } 6962 } 6963 } 6964 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,a_offset)6965 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, a_offset) { 6966 TEST_REQUIRES_ARM_NEON; 6967 for (size_t k = 1; k <= 40; k += 9) { 6968 GemmMicrokernelTester() 6969 .mr(1) 6970 .nr(16) 6971 .kr(2) 6972 .sr(4) 6973 .m(1) 6974 .n(16) 6975 .k(k) 6976 .ks(3) 6977 .a_offset(43) 6978 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6979 } 6980 } 6981 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,zero)6982 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, zero) { 6983 TEST_REQUIRES_ARM_NEON; 6984 for (size_t k = 1; k <= 40; k += 9) { 6985 for (uint32_t mz = 0; mz < 1; mz++) { 6986 GemmMicrokernelTester() 6987 .mr(1) 6988 .nr(16) 6989 .kr(2) 6990 .sr(4) 6991 .m(1) 6992 .n(16) 6993 .k(k) 6994 .ks(3) 6995 .a_offset(43) 6996 .zero_index(mz) 6997 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6998 } 6999 } 7000 } 7001 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,qmin)7002 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, qmin) { 7003 TEST_REQUIRES_ARM_NEON; 7004 GemmMicrokernelTester() 7005 .mr(1) 7006 .nr(16) 7007 .kr(2) 7008 .sr(4) 7009 .m(1) 7010 .n(16) 7011 .k(8) 7012 .qmin(128) 7013 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7014 } 7015 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,qmax)7016 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, qmax) { 7017 TEST_REQUIRES_ARM_NEON; 7018 GemmMicrokernelTester() 7019 .mr(1) 7020 .nr(16) 7021 .kr(2) 7022 .sr(4) 7023 .m(1) 7024 .n(16) 7025 .k(8) 7026 .qmax(128) 7027 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7028 } 7029 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL,strided_cm)7030 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2S4__NEON_MULL, strided_cm) { 7031 TEST_REQUIRES_ARM_NEON; 7032 GemmMicrokernelTester() 7033 .mr(1) 7034 .nr(16) 7035 .kr(2) 7036 .sr(4) 7037 .m(1) 7038 .n(16) 7039 .k(8) 7040 .cm_stride(19) 7041 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7042 } 7043 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 7044 7045 7046 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_eq_16)7047 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_eq_16) { 7048 TEST_REQUIRES_ARM_NEON; 7049 GemmMicrokernelTester() 7050 .mr(1) 7051 .nr(8) 7052 .kr(2) 7053 .sr(4) 7054 .m(1) 7055 .n(8) 7056 .k(16) 7057 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7058 } 7059 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,strided_cn)7060 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, strided_cn) { 7061 TEST_REQUIRES_ARM_NEON; 7062 GemmMicrokernelTester() 7063 .mr(1) 7064 .nr(8) 7065 .kr(2) 7066 .sr(4) 7067 .m(1) 7068 .n(8) 7069 .k(16) 7070 .cn_stride(11) 7071 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7072 } 7073 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_eq_16_subtile)7074 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_eq_16_subtile) { 7075 TEST_REQUIRES_ARM_NEON; 7076 for (uint32_t n = 1; n <= 8; n++) { 7077 for (uint32_t m = 1; m <= 1; m++) { 7078 GemmMicrokernelTester() 7079 .mr(1) 7080 .nr(8) 7081 .kr(2) 7082 .sr(4) 7083 .m(m) 7084 .n(n) 7085 .k(16) 7086 .iterations(1) 7087 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7088 } 7089 } 7090 } 7091 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_eq_16_subtile_m)7092 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_eq_16_subtile_m) { 7093 TEST_REQUIRES_ARM_NEON; 7094 for (uint32_t m = 1; m <= 1; m++) { 7095 GemmMicrokernelTester() 7096 .mr(1) 7097 .nr(8) 7098 .kr(2) 7099 .sr(4) 7100 .m(m) 7101 .n(8) 7102 .k(16) 7103 .iterations(1) 7104 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7105 } 7106 } 7107 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_eq_16_subtile_n)7108 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_eq_16_subtile_n) { 7109 TEST_REQUIRES_ARM_NEON; 7110 for (uint32_t n = 1; n <= 8; n++) { 7111 GemmMicrokernelTester() 7112 .mr(1) 7113 .nr(8) 7114 .kr(2) 7115 .sr(4) 7116 .m(1) 7117 .n(n) 7118 .k(16) 7119 .iterations(1) 7120 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7121 } 7122 } 7123 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_lt_16)7124 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_lt_16) { 7125 TEST_REQUIRES_ARM_NEON; 7126 for (size_t k = 1; k < 16; k++) { 7127 GemmMicrokernelTester() 7128 .mr(1) 7129 .nr(8) 7130 .kr(2) 7131 .sr(4) 7132 .m(1) 7133 .n(8) 7134 .k(k) 7135 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7136 } 7137 } 7138 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_lt_16_subtile)7139 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_lt_16_subtile) { 7140 TEST_REQUIRES_ARM_NEON; 7141 for (size_t k = 1; k < 16; k++) { 7142 for (uint32_t n = 1; n <= 8; n++) { 7143 for (uint32_t m = 1; m <= 1; m++) { 7144 GemmMicrokernelTester() 7145 .mr(1) 7146 .nr(8) 7147 .kr(2) 7148 .sr(4) 7149 .m(m) 7150 .n(n) 7151 .k(k) 7152 .iterations(1) 7153 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7154 } 7155 } 7156 } 7157 } 7158 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_gt_16)7159 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_gt_16) { 7160 TEST_REQUIRES_ARM_NEON; 7161 for (size_t k = 17; k < 32; k++) { 7162 GemmMicrokernelTester() 7163 .mr(1) 7164 .nr(8) 7165 .kr(2) 7166 .sr(4) 7167 .m(1) 7168 .n(8) 7169 .k(k) 7170 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7171 } 7172 } 7173 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_gt_16_subtile)7174 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_gt_16_subtile) { 7175 TEST_REQUIRES_ARM_NEON; 7176 for (size_t k = 17; k < 32; k++) { 7177 for (uint32_t n = 1; n <= 8; n++) { 7178 for (uint32_t m = 1; m <= 1; m++) { 7179 GemmMicrokernelTester() 7180 .mr(1) 7181 .nr(8) 7182 .kr(2) 7183 .sr(4) 7184 .m(m) 7185 .n(n) 7186 .k(k) 7187 .iterations(1) 7188 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7189 } 7190 } 7191 } 7192 } 7193 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_div_16)7194 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_div_16) { 7195 TEST_REQUIRES_ARM_NEON; 7196 for (size_t k = 32; k <= 160; k += 16) { 7197 GemmMicrokernelTester() 7198 .mr(1) 7199 .nr(8) 7200 .kr(2) 7201 .sr(4) 7202 .m(1) 7203 .n(8) 7204 .k(k) 7205 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7206 } 7207 } 7208 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_div_16_subtile)7209 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_div_16_subtile) { 7210 TEST_REQUIRES_ARM_NEON; 7211 for (size_t k = 32; k <= 160; k += 16) { 7212 for (uint32_t n = 1; n <= 8; n++) { 7213 for (uint32_t m = 1; m <= 1; m++) { 7214 GemmMicrokernelTester() 7215 .mr(1) 7216 .nr(8) 7217 .kr(2) 7218 .sr(4) 7219 .m(m) 7220 .n(n) 7221 .k(k) 7222 .iterations(1) 7223 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7224 } 7225 } 7226 } 7227 } 7228 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,n_gt_8)7229 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, n_gt_8) { 7230 TEST_REQUIRES_ARM_NEON; 7231 for (uint32_t n = 9; n < 16; n++) { 7232 for (size_t k = 1; k <= 80; k += 17) { 7233 GemmMicrokernelTester() 7234 .mr(1) 7235 .nr(8) 7236 .kr(2) 7237 .sr(4) 7238 .m(1) 7239 .n(n) 7240 .k(k) 7241 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7242 } 7243 } 7244 } 7245 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,n_gt_8_strided_cn)7246 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, n_gt_8_strided_cn) { 7247 TEST_REQUIRES_ARM_NEON; 7248 for (uint32_t n = 9; n < 16; n++) { 7249 for (size_t k = 1; k <= 80; k += 17) { 7250 GemmMicrokernelTester() 7251 .mr(1) 7252 .nr(8) 7253 .kr(2) 7254 .sr(4) 7255 .m(1) 7256 .n(n) 7257 .k(k) 7258 .cn_stride(11) 7259 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7260 } 7261 } 7262 } 7263 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,n_gt_8_subtile)7264 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, n_gt_8_subtile) { 7265 TEST_REQUIRES_ARM_NEON; 7266 for (uint32_t n = 9; n < 16; n++) { 7267 for (size_t k = 1; k <= 80; k += 17) { 7268 for (uint32_t m = 1; m <= 1; m++) { 7269 GemmMicrokernelTester() 7270 .mr(1) 7271 .nr(8) 7272 .kr(2) 7273 .sr(4) 7274 .m(m) 7275 .n(n) 7276 .k(k) 7277 .iterations(1) 7278 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7279 } 7280 } 7281 } 7282 } 7283 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,n_div_8)7284 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, n_div_8) { 7285 TEST_REQUIRES_ARM_NEON; 7286 for (uint32_t n = 16; n <= 24; n += 8) { 7287 for (size_t k = 1; k <= 80; k += 17) { 7288 GemmMicrokernelTester() 7289 .mr(1) 7290 .nr(8) 7291 .kr(2) 7292 .sr(4) 7293 .m(1) 7294 .n(n) 7295 .k(k) 7296 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7297 } 7298 } 7299 } 7300 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,n_div_8_strided_cn)7301 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, n_div_8_strided_cn) { 7302 TEST_REQUIRES_ARM_NEON; 7303 for (uint32_t n = 16; n <= 24; n += 8) { 7304 for (size_t k = 1; k <= 80; k += 17) { 7305 GemmMicrokernelTester() 7306 .mr(1) 7307 .nr(8) 7308 .kr(2) 7309 .sr(4) 7310 .m(1) 7311 .n(n) 7312 .k(k) 7313 .cn_stride(11) 7314 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7315 } 7316 } 7317 } 7318 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,n_div_8_subtile)7319 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, n_div_8_subtile) { 7320 TEST_REQUIRES_ARM_NEON; 7321 for (uint32_t n = 16; n <= 24; n += 8) { 7322 for (size_t k = 1; k <= 80; k += 17) { 7323 for (uint32_t m = 1; m <= 1; m++) { 7324 GemmMicrokernelTester() 7325 .mr(1) 7326 .nr(8) 7327 .kr(2) 7328 .sr(4) 7329 .m(m) 7330 .n(n) 7331 .k(k) 7332 .iterations(1) 7333 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7334 } 7335 } 7336 } 7337 } 7338 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,small_kernel)7339 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, small_kernel) { 7340 TEST_REQUIRES_ARM_NEON; 7341 for (size_t k = 1; k <= 80; k += 17) { 7342 GemmMicrokernelTester() 7343 .mr(1) 7344 .nr(8) 7345 .kr(2) 7346 .sr(4) 7347 .m(1) 7348 .n(8) 7349 .k(k) 7350 .ks(3) 7351 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7352 } 7353 } 7354 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,small_kernel_subtile)7355 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, small_kernel_subtile) { 7356 TEST_REQUIRES_ARM_NEON; 7357 for (size_t k = 1; k <= 80; k += 17) { 7358 for (uint32_t n = 1; n <= 8; n++) { 7359 for (uint32_t m = 1; m <= 1; m++) { 7360 GemmMicrokernelTester() 7361 .mr(1) 7362 .nr(8) 7363 .kr(2) 7364 .sr(4) 7365 .m(m) 7366 .n(n) 7367 .k(k) 7368 .ks(3) 7369 .iterations(1) 7370 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7371 } 7372 } 7373 } 7374 } 7375 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,n_gt_8_small_kernel)7376 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, n_gt_8_small_kernel) { 7377 TEST_REQUIRES_ARM_NEON; 7378 for (uint32_t n = 9; n < 16; n++) { 7379 for (size_t k = 1; k <= 80; k += 17) { 7380 GemmMicrokernelTester() 7381 .mr(1) 7382 .nr(8) 7383 .kr(2) 7384 .sr(4) 7385 .m(1) 7386 .n(n) 7387 .k(k) 7388 .ks(3) 7389 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7390 } 7391 } 7392 } 7393 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,n_div_8_small_kernel)7394 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, n_div_8_small_kernel) { 7395 TEST_REQUIRES_ARM_NEON; 7396 for (uint32_t n = 16; n <= 24; n += 8) { 7397 for (size_t k = 1; k <= 80; k += 17) { 7398 GemmMicrokernelTester() 7399 .mr(1) 7400 .nr(8) 7401 .kr(2) 7402 .sr(4) 7403 .m(1) 7404 .n(n) 7405 .k(k) 7406 .ks(3) 7407 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7408 } 7409 } 7410 } 7411 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,strided_cm_subtile)7412 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, strided_cm_subtile) { 7413 TEST_REQUIRES_ARM_NEON; 7414 for (size_t k = 1; k <= 80; k += 17) { 7415 for (uint32_t n = 1; n <= 8; n++) { 7416 for (uint32_t m = 1; m <= 1; m++) { 7417 GemmMicrokernelTester() 7418 .mr(1) 7419 .nr(8) 7420 .kr(2) 7421 .sr(4) 7422 .m(m) 7423 .n(n) 7424 .k(k) 7425 .cm_stride(11) 7426 .iterations(1) 7427 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7428 } 7429 } 7430 } 7431 } 7432 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,a_offset)7433 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, a_offset) { 7434 TEST_REQUIRES_ARM_NEON; 7435 for (size_t k = 1; k <= 80; k += 17) { 7436 GemmMicrokernelTester() 7437 .mr(1) 7438 .nr(8) 7439 .kr(2) 7440 .sr(4) 7441 .m(1) 7442 .n(8) 7443 .k(k) 7444 .ks(3) 7445 .a_offset(83) 7446 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7447 } 7448 } 7449 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,zero)7450 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, zero) { 7451 TEST_REQUIRES_ARM_NEON; 7452 for (size_t k = 1; k <= 80; k += 17) { 7453 for (uint32_t mz = 0; mz < 1; mz++) { 7454 GemmMicrokernelTester() 7455 .mr(1) 7456 .nr(8) 7457 .kr(2) 7458 .sr(4) 7459 .m(1) 7460 .n(8) 7461 .k(k) 7462 .ks(3) 7463 .a_offset(83) 7464 .zero_index(mz) 7465 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7466 } 7467 } 7468 } 7469 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,qmin)7470 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, qmin) { 7471 TEST_REQUIRES_ARM_NEON; 7472 GemmMicrokernelTester() 7473 .mr(1) 7474 .nr(8) 7475 .kr(2) 7476 .sr(4) 7477 .m(1) 7478 .n(8) 7479 .k(16) 7480 .qmin(128) 7481 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7482 } 7483 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,qmax)7484 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, qmax) { 7485 TEST_REQUIRES_ARM_NEON; 7486 GemmMicrokernelTester() 7487 .mr(1) 7488 .nr(8) 7489 .kr(2) 7490 .sr(4) 7491 .m(1) 7492 .n(8) 7493 .k(16) 7494 .qmax(128) 7495 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7496 } 7497 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,strided_cm)7498 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, strided_cm) { 7499 TEST_REQUIRES_ARM_NEON; 7500 GemmMicrokernelTester() 7501 .mr(1) 7502 .nr(8) 7503 .kr(2) 7504 .sr(4) 7505 .m(1) 7506 .n(8) 7507 .k(16) 7508 .cm_stride(11) 7509 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7510 } 7511 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 7512 7513 7514 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,k_eq_16)7515 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, k_eq_16) { 7516 TEST_REQUIRES_ARM_NEON; 7517 GemmMicrokernelTester() 7518 .mr(4) 7519 .nr(16) 7520 .kr(2) 7521 .sr(4) 7522 .m(4) 7523 .n(16) 7524 .k(16) 7525 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7526 } 7527 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,strided_cn)7528 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, strided_cn) { 7529 TEST_REQUIRES_ARM_NEON; 7530 GemmMicrokernelTester() 7531 .mr(4) 7532 .nr(16) 7533 .kr(2) 7534 .sr(4) 7535 .m(4) 7536 .n(16) 7537 .k(16) 7538 .cn_stride(19) 7539 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7540 } 7541 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,k_eq_16_subtile)7542 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, k_eq_16_subtile) { 7543 TEST_REQUIRES_ARM_NEON; 7544 for (uint32_t n = 1; n <= 16; n++) { 7545 for (uint32_t m = 1; m <= 4; m++) { 7546 GemmMicrokernelTester() 7547 .mr(4) 7548 .nr(16) 7549 .kr(2) 7550 .sr(4) 7551 .m(m) 7552 .n(n) 7553 .k(16) 7554 .iterations(1) 7555 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7556 } 7557 } 7558 } 7559 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,k_eq_16_subtile_m)7560 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, k_eq_16_subtile_m) { 7561 TEST_REQUIRES_ARM_NEON; 7562 for (uint32_t m = 1; m <= 4; m++) { 7563 GemmMicrokernelTester() 7564 .mr(4) 7565 .nr(16) 7566 .kr(2) 7567 .sr(4) 7568 .m(m) 7569 .n(16) 7570 .k(16) 7571 .iterations(1) 7572 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7573 } 7574 } 7575 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,k_eq_16_subtile_n)7576 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, k_eq_16_subtile_n) { 7577 TEST_REQUIRES_ARM_NEON; 7578 for (uint32_t n = 1; n <= 16; n++) { 7579 GemmMicrokernelTester() 7580 .mr(4) 7581 .nr(16) 7582 .kr(2) 7583 .sr(4) 7584 .m(4) 7585 .n(n) 7586 .k(16) 7587 .iterations(1) 7588 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7589 } 7590 } 7591 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,k_lt_16)7592 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, k_lt_16) { 7593 TEST_REQUIRES_ARM_NEON; 7594 for (size_t k = 1; k < 16; k++) { 7595 GemmMicrokernelTester() 7596 .mr(4) 7597 .nr(16) 7598 .kr(2) 7599 .sr(4) 7600 .m(4) 7601 .n(16) 7602 .k(k) 7603 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7604 } 7605 } 7606 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,k_lt_16_subtile)7607 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, k_lt_16_subtile) { 7608 TEST_REQUIRES_ARM_NEON; 7609 for (size_t k = 1; k < 16; k++) { 7610 for (uint32_t n = 1; n <= 16; n++) { 7611 for (uint32_t m = 1; m <= 4; m++) { 7612 GemmMicrokernelTester() 7613 .mr(4) 7614 .nr(16) 7615 .kr(2) 7616 .sr(4) 7617 .m(m) 7618 .n(n) 7619 .k(k) 7620 .iterations(1) 7621 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7622 } 7623 } 7624 } 7625 } 7626 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,k_gt_16)7627 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, k_gt_16) { 7628 TEST_REQUIRES_ARM_NEON; 7629 for (size_t k = 17; k < 32; k++) { 7630 GemmMicrokernelTester() 7631 .mr(4) 7632 .nr(16) 7633 .kr(2) 7634 .sr(4) 7635 .m(4) 7636 .n(16) 7637 .k(k) 7638 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7639 } 7640 } 7641 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,k_gt_16_subtile)7642 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, k_gt_16_subtile) { 7643 TEST_REQUIRES_ARM_NEON; 7644 for (size_t k = 17; k < 32; k++) { 7645 for (uint32_t n = 1; n <= 16; n++) { 7646 for (uint32_t m = 1; m <= 4; m++) { 7647 GemmMicrokernelTester() 7648 .mr(4) 7649 .nr(16) 7650 .kr(2) 7651 .sr(4) 7652 .m(m) 7653 .n(n) 7654 .k(k) 7655 .iterations(1) 7656 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7657 } 7658 } 7659 } 7660 } 7661 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,k_div_16)7662 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, k_div_16) { 7663 TEST_REQUIRES_ARM_NEON; 7664 for (size_t k = 32; k <= 160; k += 16) { 7665 GemmMicrokernelTester() 7666 .mr(4) 7667 .nr(16) 7668 .kr(2) 7669 .sr(4) 7670 .m(4) 7671 .n(16) 7672 .k(k) 7673 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7674 } 7675 } 7676 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,k_div_16_subtile)7677 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, k_div_16_subtile) { 7678 TEST_REQUIRES_ARM_NEON; 7679 for (size_t k = 32; k <= 160; k += 16) { 7680 for (uint32_t n = 1; n <= 16; n++) { 7681 for (uint32_t m = 1; m <= 4; m++) { 7682 GemmMicrokernelTester() 7683 .mr(4) 7684 .nr(16) 7685 .kr(2) 7686 .sr(4) 7687 .m(m) 7688 .n(n) 7689 .k(k) 7690 .iterations(1) 7691 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7692 } 7693 } 7694 } 7695 } 7696 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,n_gt_16)7697 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, n_gt_16) { 7698 TEST_REQUIRES_ARM_NEON; 7699 for (uint32_t n = 17; n < 32; n++) { 7700 for (size_t k = 1; k <= 80; k += 17) { 7701 GemmMicrokernelTester() 7702 .mr(4) 7703 .nr(16) 7704 .kr(2) 7705 .sr(4) 7706 .m(4) 7707 .n(n) 7708 .k(k) 7709 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7710 } 7711 } 7712 } 7713 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,n_gt_16_strided_cn)7714 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, n_gt_16_strided_cn) { 7715 TEST_REQUIRES_ARM_NEON; 7716 for (uint32_t n = 17; n < 32; n++) { 7717 for (size_t k = 1; k <= 80; k += 17) { 7718 GemmMicrokernelTester() 7719 .mr(4) 7720 .nr(16) 7721 .kr(2) 7722 .sr(4) 7723 .m(4) 7724 .n(n) 7725 .k(k) 7726 .cn_stride(19) 7727 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7728 } 7729 } 7730 } 7731 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,n_gt_16_subtile)7732 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, n_gt_16_subtile) { 7733 TEST_REQUIRES_ARM_NEON; 7734 for (uint32_t n = 17; n < 32; n++) { 7735 for (size_t k = 1; k <= 80; k += 17) { 7736 for (uint32_t m = 1; m <= 4; m++) { 7737 GemmMicrokernelTester() 7738 .mr(4) 7739 .nr(16) 7740 .kr(2) 7741 .sr(4) 7742 .m(m) 7743 .n(n) 7744 .k(k) 7745 .iterations(1) 7746 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7747 } 7748 } 7749 } 7750 } 7751 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,n_div_16)7752 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, n_div_16) { 7753 TEST_REQUIRES_ARM_NEON; 7754 for (uint32_t n = 32; n <= 48; n += 16) { 7755 for (size_t k = 1; k <= 80; k += 17) { 7756 GemmMicrokernelTester() 7757 .mr(4) 7758 .nr(16) 7759 .kr(2) 7760 .sr(4) 7761 .m(4) 7762 .n(n) 7763 .k(k) 7764 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7765 } 7766 } 7767 } 7768 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,n_div_16_strided_cn)7769 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, n_div_16_strided_cn) { 7770 TEST_REQUIRES_ARM_NEON; 7771 for (uint32_t n = 32; n <= 48; n += 16) { 7772 for (size_t k = 1; k <= 80; k += 17) { 7773 GemmMicrokernelTester() 7774 .mr(4) 7775 .nr(16) 7776 .kr(2) 7777 .sr(4) 7778 .m(4) 7779 .n(n) 7780 .k(k) 7781 .cn_stride(19) 7782 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7783 } 7784 } 7785 } 7786 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,n_div_16_subtile)7787 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, n_div_16_subtile) { 7788 TEST_REQUIRES_ARM_NEON; 7789 for (uint32_t n = 32; n <= 48; n += 16) { 7790 for (size_t k = 1; k <= 80; k += 17) { 7791 for (uint32_t m = 1; m <= 4; m++) { 7792 GemmMicrokernelTester() 7793 .mr(4) 7794 .nr(16) 7795 .kr(2) 7796 .sr(4) 7797 .m(m) 7798 .n(n) 7799 .k(k) 7800 .iterations(1) 7801 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7802 } 7803 } 7804 } 7805 } 7806 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,small_kernel)7807 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, small_kernel) { 7808 TEST_REQUIRES_ARM_NEON; 7809 for (size_t k = 1; k <= 80; k += 17) { 7810 GemmMicrokernelTester() 7811 .mr(4) 7812 .nr(16) 7813 .kr(2) 7814 .sr(4) 7815 .m(4) 7816 .n(16) 7817 .k(k) 7818 .ks(3) 7819 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7820 } 7821 } 7822 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,small_kernel_subtile)7823 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, small_kernel_subtile) { 7824 TEST_REQUIRES_ARM_NEON; 7825 for (size_t k = 1; k <= 80; k += 17) { 7826 for (uint32_t n = 1; n <= 16; n++) { 7827 for (uint32_t m = 1; m <= 4; m++) { 7828 GemmMicrokernelTester() 7829 .mr(4) 7830 .nr(16) 7831 .kr(2) 7832 .sr(4) 7833 .m(m) 7834 .n(n) 7835 .k(k) 7836 .ks(3) 7837 .iterations(1) 7838 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7839 } 7840 } 7841 } 7842 } 7843 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,n_gt_16_small_kernel)7844 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, n_gt_16_small_kernel) { 7845 TEST_REQUIRES_ARM_NEON; 7846 for (uint32_t n = 17; n < 32; n++) { 7847 for (size_t k = 1; k <= 80; k += 17) { 7848 GemmMicrokernelTester() 7849 .mr(4) 7850 .nr(16) 7851 .kr(2) 7852 .sr(4) 7853 .m(4) 7854 .n(n) 7855 .k(k) 7856 .ks(3) 7857 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7858 } 7859 } 7860 } 7861 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,n_div_16_small_kernel)7862 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, n_div_16_small_kernel) { 7863 TEST_REQUIRES_ARM_NEON; 7864 for (uint32_t n = 32; n <= 48; n += 16) { 7865 for (size_t k = 1; k <= 80; k += 17) { 7866 GemmMicrokernelTester() 7867 .mr(4) 7868 .nr(16) 7869 .kr(2) 7870 .sr(4) 7871 .m(4) 7872 .n(n) 7873 .k(k) 7874 .ks(3) 7875 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7876 } 7877 } 7878 } 7879 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,strided_cm_subtile)7880 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, strided_cm_subtile) { 7881 TEST_REQUIRES_ARM_NEON; 7882 for (size_t k = 1; k <= 80; k += 17) { 7883 for (uint32_t n = 1; n <= 16; n++) { 7884 for (uint32_t m = 1; m <= 4; m++) { 7885 GemmMicrokernelTester() 7886 .mr(4) 7887 .nr(16) 7888 .kr(2) 7889 .sr(4) 7890 .m(m) 7891 .n(n) 7892 .k(k) 7893 .cm_stride(19) 7894 .iterations(1) 7895 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7896 } 7897 } 7898 } 7899 } 7900 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,a_offset)7901 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, a_offset) { 7902 TEST_REQUIRES_ARM_NEON; 7903 for (size_t k = 1; k <= 80; k += 17) { 7904 GemmMicrokernelTester() 7905 .mr(4) 7906 .nr(16) 7907 .kr(2) 7908 .sr(4) 7909 .m(4) 7910 .n(16) 7911 .k(k) 7912 .ks(3) 7913 .a_offset(331) 7914 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7915 } 7916 } 7917 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,zero)7918 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, zero) { 7919 TEST_REQUIRES_ARM_NEON; 7920 for (size_t k = 1; k <= 80; k += 17) { 7921 for (uint32_t mz = 0; mz < 4; mz++) { 7922 GemmMicrokernelTester() 7923 .mr(4) 7924 .nr(16) 7925 .kr(2) 7926 .sr(4) 7927 .m(4) 7928 .n(16) 7929 .k(k) 7930 .ks(3) 7931 .a_offset(331) 7932 .zero_index(mz) 7933 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7934 } 7935 } 7936 } 7937 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,qmin)7938 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, qmin) { 7939 TEST_REQUIRES_ARM_NEON; 7940 GemmMicrokernelTester() 7941 .mr(4) 7942 .nr(16) 7943 .kr(2) 7944 .sr(4) 7945 .m(4) 7946 .n(16) 7947 .k(16) 7948 .qmin(128) 7949 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7950 } 7951 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,qmax)7952 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, qmax) { 7953 TEST_REQUIRES_ARM_NEON; 7954 GemmMicrokernelTester() 7955 .mr(4) 7956 .nr(16) 7957 .kr(2) 7958 .sr(4) 7959 .m(4) 7960 .n(16) 7961 .k(16) 7962 .qmax(128) 7963 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7964 } 7965 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL,strided_cm)7966 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2S4__NEON_MLAL, strided_cm) { 7967 TEST_REQUIRES_ARM_NEON; 7968 GemmMicrokernelTester() 7969 .mr(4) 7970 .nr(16) 7971 .kr(2) 7972 .sr(4) 7973 .m(4) 7974 .n(16) 7975 .k(16) 7976 .cm_stride(19) 7977 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7978 } 7979 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 7980 7981 7982 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,k_eq_8)7983 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, k_eq_8) { 7984 TEST_REQUIRES_ARM_NEON; 7985 GemmMicrokernelTester() 7986 .mr(4) 7987 .nr(8) 7988 .kr(4) 7989 .sr(1) 7990 .m(4) 7991 .n(8) 7992 .k(8) 7993 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7994 } 7995 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,strided_cn)7996 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, strided_cn) { 7997 TEST_REQUIRES_ARM_NEON; 7998 GemmMicrokernelTester() 7999 .mr(4) 8000 .nr(8) 8001 .kr(4) 8002 .sr(1) 8003 .m(4) 8004 .n(8) 8005 .k(8) 8006 .cn_stride(11) 8007 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8008 } 8009 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,k_eq_8_subtile)8010 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, k_eq_8_subtile) { 8011 TEST_REQUIRES_ARM_NEON; 8012 for (uint32_t n = 1; n <= 8; n++) { 8013 for (uint32_t m = 1; m <= 4; m++) { 8014 GemmMicrokernelTester() 8015 .mr(4) 8016 .nr(8) 8017 .kr(4) 8018 .sr(1) 8019 .m(m) 8020 .n(n) 8021 .k(8) 8022 .iterations(1) 8023 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8024 } 8025 } 8026 } 8027 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,k_eq_8_subtile_m)8028 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, k_eq_8_subtile_m) { 8029 TEST_REQUIRES_ARM_NEON; 8030 for (uint32_t m = 1; m <= 4; m++) { 8031 GemmMicrokernelTester() 8032 .mr(4) 8033 .nr(8) 8034 .kr(4) 8035 .sr(1) 8036 .m(m) 8037 .n(8) 8038 .k(8) 8039 .iterations(1) 8040 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8041 } 8042 } 8043 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,k_eq_8_subtile_n)8044 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, k_eq_8_subtile_n) { 8045 TEST_REQUIRES_ARM_NEON; 8046 for (uint32_t n = 1; n <= 8; n++) { 8047 GemmMicrokernelTester() 8048 .mr(4) 8049 .nr(8) 8050 .kr(4) 8051 .sr(1) 8052 .m(4) 8053 .n(n) 8054 .k(8) 8055 .iterations(1) 8056 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8057 } 8058 } 8059 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,k_lt_8)8060 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, k_lt_8) { 8061 TEST_REQUIRES_ARM_NEON; 8062 for (size_t k = 1; k < 8; k++) { 8063 GemmMicrokernelTester() 8064 .mr(4) 8065 .nr(8) 8066 .kr(4) 8067 .sr(1) 8068 .m(4) 8069 .n(8) 8070 .k(k) 8071 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8072 } 8073 } 8074 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,k_lt_8_subtile)8075 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, k_lt_8_subtile) { 8076 TEST_REQUIRES_ARM_NEON; 8077 for (size_t k = 1; k < 8; k++) { 8078 for (uint32_t n = 1; n <= 8; n++) { 8079 for (uint32_t m = 1; m <= 4; m++) { 8080 GemmMicrokernelTester() 8081 .mr(4) 8082 .nr(8) 8083 .kr(4) 8084 .sr(1) 8085 .m(m) 8086 .n(n) 8087 .k(k) 8088 .iterations(1) 8089 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8090 } 8091 } 8092 } 8093 } 8094 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,k_gt_8)8095 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, k_gt_8) { 8096 TEST_REQUIRES_ARM_NEON; 8097 for (size_t k = 9; k < 16; k++) { 8098 GemmMicrokernelTester() 8099 .mr(4) 8100 .nr(8) 8101 .kr(4) 8102 .sr(1) 8103 .m(4) 8104 .n(8) 8105 .k(k) 8106 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8107 } 8108 } 8109 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,k_gt_8_subtile)8110 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, k_gt_8_subtile) { 8111 TEST_REQUIRES_ARM_NEON; 8112 for (size_t k = 9; k < 16; k++) { 8113 for (uint32_t n = 1; n <= 8; n++) { 8114 for (uint32_t m = 1; m <= 4; m++) { 8115 GemmMicrokernelTester() 8116 .mr(4) 8117 .nr(8) 8118 .kr(4) 8119 .sr(1) 8120 .m(m) 8121 .n(n) 8122 .k(k) 8123 .iterations(1) 8124 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8125 } 8126 } 8127 } 8128 } 8129 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,k_div_8)8130 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, k_div_8) { 8131 TEST_REQUIRES_ARM_NEON; 8132 for (size_t k = 16; k <= 80; k += 8) { 8133 GemmMicrokernelTester() 8134 .mr(4) 8135 .nr(8) 8136 .kr(4) 8137 .sr(1) 8138 .m(4) 8139 .n(8) 8140 .k(k) 8141 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8142 } 8143 } 8144 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,k_div_8_subtile)8145 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, k_div_8_subtile) { 8146 TEST_REQUIRES_ARM_NEON; 8147 for (size_t k = 16; k <= 80; k += 8) { 8148 for (uint32_t n = 1; n <= 8; n++) { 8149 for (uint32_t m = 1; m <= 4; m++) { 8150 GemmMicrokernelTester() 8151 .mr(4) 8152 .nr(8) 8153 .kr(4) 8154 .sr(1) 8155 .m(m) 8156 .n(n) 8157 .k(k) 8158 .iterations(1) 8159 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8160 } 8161 } 8162 } 8163 } 8164 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,n_gt_8)8165 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, n_gt_8) { 8166 TEST_REQUIRES_ARM_NEON; 8167 for (uint32_t n = 9; n < 16; n++) { 8168 for (size_t k = 1; k <= 40; k += 9) { 8169 GemmMicrokernelTester() 8170 .mr(4) 8171 .nr(8) 8172 .kr(4) 8173 .sr(1) 8174 .m(4) 8175 .n(n) 8176 .k(k) 8177 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8178 } 8179 } 8180 } 8181 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,n_gt_8_strided_cn)8182 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, n_gt_8_strided_cn) { 8183 TEST_REQUIRES_ARM_NEON; 8184 for (uint32_t n = 9; n < 16; n++) { 8185 for (size_t k = 1; k <= 40; k += 9) { 8186 GemmMicrokernelTester() 8187 .mr(4) 8188 .nr(8) 8189 .kr(4) 8190 .sr(1) 8191 .m(4) 8192 .n(n) 8193 .k(k) 8194 .cn_stride(11) 8195 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8196 } 8197 } 8198 } 8199 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,n_gt_8_subtile)8200 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, n_gt_8_subtile) { 8201 TEST_REQUIRES_ARM_NEON; 8202 for (uint32_t n = 9; n < 16; n++) { 8203 for (size_t k = 1; k <= 40; k += 9) { 8204 for (uint32_t m = 1; m <= 4; m++) { 8205 GemmMicrokernelTester() 8206 .mr(4) 8207 .nr(8) 8208 .kr(4) 8209 .sr(1) 8210 .m(m) 8211 .n(n) 8212 .k(k) 8213 .iterations(1) 8214 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8215 } 8216 } 8217 } 8218 } 8219 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,n_div_8)8220 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, n_div_8) { 8221 TEST_REQUIRES_ARM_NEON; 8222 for (uint32_t n = 16; n <= 24; n += 8) { 8223 for (size_t k = 1; k <= 40; k += 9) { 8224 GemmMicrokernelTester() 8225 .mr(4) 8226 .nr(8) 8227 .kr(4) 8228 .sr(1) 8229 .m(4) 8230 .n(n) 8231 .k(k) 8232 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8233 } 8234 } 8235 } 8236 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,n_div_8_strided_cn)8237 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, n_div_8_strided_cn) { 8238 TEST_REQUIRES_ARM_NEON; 8239 for (uint32_t n = 16; n <= 24; n += 8) { 8240 for (size_t k = 1; k <= 40; k += 9) { 8241 GemmMicrokernelTester() 8242 .mr(4) 8243 .nr(8) 8244 .kr(4) 8245 .sr(1) 8246 .m(4) 8247 .n(n) 8248 .k(k) 8249 .cn_stride(11) 8250 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8251 } 8252 } 8253 } 8254 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,n_div_8_subtile)8255 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, n_div_8_subtile) { 8256 TEST_REQUIRES_ARM_NEON; 8257 for (uint32_t n = 16; n <= 24; n += 8) { 8258 for (size_t k = 1; k <= 40; k += 9) { 8259 for (uint32_t m = 1; m <= 4; m++) { 8260 GemmMicrokernelTester() 8261 .mr(4) 8262 .nr(8) 8263 .kr(4) 8264 .sr(1) 8265 .m(m) 8266 .n(n) 8267 .k(k) 8268 .iterations(1) 8269 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8270 } 8271 } 8272 } 8273 } 8274 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,small_kernel)8275 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, small_kernel) { 8276 TEST_REQUIRES_ARM_NEON; 8277 for (size_t k = 1; k <= 40; k += 9) { 8278 GemmMicrokernelTester() 8279 .mr(4) 8280 .nr(8) 8281 .kr(4) 8282 .sr(1) 8283 .m(4) 8284 .n(8) 8285 .k(k) 8286 .ks(3) 8287 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8288 } 8289 } 8290 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,small_kernel_subtile)8291 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, small_kernel_subtile) { 8292 TEST_REQUIRES_ARM_NEON; 8293 for (size_t k = 1; k <= 40; k += 9) { 8294 for (uint32_t n = 1; n <= 8; n++) { 8295 for (uint32_t m = 1; m <= 4; m++) { 8296 GemmMicrokernelTester() 8297 .mr(4) 8298 .nr(8) 8299 .kr(4) 8300 .sr(1) 8301 .m(m) 8302 .n(n) 8303 .k(k) 8304 .ks(3) 8305 .iterations(1) 8306 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8307 } 8308 } 8309 } 8310 } 8311 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,n_gt_8_small_kernel)8312 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, n_gt_8_small_kernel) { 8313 TEST_REQUIRES_ARM_NEON; 8314 for (uint32_t n = 9; n < 16; n++) { 8315 for (size_t k = 1; k <= 40; k += 9) { 8316 GemmMicrokernelTester() 8317 .mr(4) 8318 .nr(8) 8319 .kr(4) 8320 .sr(1) 8321 .m(4) 8322 .n(n) 8323 .k(k) 8324 .ks(3) 8325 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8326 } 8327 } 8328 } 8329 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,n_div_8_small_kernel)8330 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, n_div_8_small_kernel) { 8331 TEST_REQUIRES_ARM_NEON; 8332 for (uint32_t n = 16; n <= 24; n += 8) { 8333 for (size_t k = 1; k <= 40; k += 9) { 8334 GemmMicrokernelTester() 8335 .mr(4) 8336 .nr(8) 8337 .kr(4) 8338 .sr(1) 8339 .m(4) 8340 .n(n) 8341 .k(k) 8342 .ks(3) 8343 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8344 } 8345 } 8346 } 8347 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,strided_cm_subtile)8348 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, strided_cm_subtile) { 8349 TEST_REQUIRES_ARM_NEON; 8350 for (size_t k = 1; k <= 40; k += 9) { 8351 for (uint32_t n = 1; n <= 8; n++) { 8352 for (uint32_t m = 1; m <= 4; m++) { 8353 GemmMicrokernelTester() 8354 .mr(4) 8355 .nr(8) 8356 .kr(4) 8357 .sr(1) 8358 .m(m) 8359 .n(n) 8360 .k(k) 8361 .cm_stride(11) 8362 .iterations(1) 8363 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8364 } 8365 } 8366 } 8367 } 8368 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,a_offset)8369 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, a_offset) { 8370 TEST_REQUIRES_ARM_NEON; 8371 for (size_t k = 1; k <= 40; k += 9) { 8372 GemmMicrokernelTester() 8373 .mr(4) 8374 .nr(8) 8375 .kr(4) 8376 .sr(1) 8377 .m(4) 8378 .n(8) 8379 .k(k) 8380 .ks(3) 8381 .a_offset(163) 8382 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8383 } 8384 } 8385 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,zero)8386 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, zero) { 8387 TEST_REQUIRES_ARM_NEON; 8388 for (size_t k = 1; k <= 40; k += 9) { 8389 for (uint32_t mz = 0; mz < 4; mz++) { 8390 GemmMicrokernelTester() 8391 .mr(4) 8392 .nr(8) 8393 .kr(4) 8394 .sr(1) 8395 .m(4) 8396 .n(8) 8397 .k(k) 8398 .ks(3) 8399 .a_offset(163) 8400 .zero_index(mz) 8401 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8402 } 8403 } 8404 } 8405 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,qmin)8406 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, qmin) { 8407 TEST_REQUIRES_ARM_NEON; 8408 GemmMicrokernelTester() 8409 .mr(4) 8410 .nr(8) 8411 .kr(4) 8412 .sr(1) 8413 .m(4) 8414 .n(8) 8415 .k(8) 8416 .qmin(128) 8417 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8418 } 8419 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,qmax)8420 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, qmax) { 8421 TEST_REQUIRES_ARM_NEON; 8422 GemmMicrokernelTester() 8423 .mr(4) 8424 .nr(8) 8425 .kr(4) 8426 .sr(1) 8427 .m(4) 8428 .n(8) 8429 .k(8) 8430 .qmax(128) 8431 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8432 } 8433 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP,strided_cm)8434 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_DUP, strided_cm) { 8435 TEST_REQUIRES_ARM_NEON; 8436 GemmMicrokernelTester() 8437 .mr(4) 8438 .nr(8) 8439 .kr(4) 8440 .sr(1) 8441 .m(4) 8442 .n(8) 8443 .k(8) 8444 .cm_stride(11) 8445 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8446 } 8447 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 8448 8449 8450 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,k_eq_8)8451 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, k_eq_8) { 8452 TEST_REQUIRES_ARM_NEON; 8453 GemmMicrokernelTester() 8454 .mr(2) 8455 .nr(16) 8456 .kr(4) 8457 .sr(1) 8458 .m(2) 8459 .n(16) 8460 .k(8) 8461 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8462 } 8463 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,strided_cn)8464 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, strided_cn) { 8465 TEST_REQUIRES_ARM_NEON; 8466 GemmMicrokernelTester() 8467 .mr(2) 8468 .nr(16) 8469 .kr(4) 8470 .sr(1) 8471 .m(2) 8472 .n(16) 8473 .k(8) 8474 .cn_stride(19) 8475 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8476 } 8477 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,k_eq_8_subtile)8478 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, k_eq_8_subtile) { 8479 TEST_REQUIRES_ARM_NEON; 8480 for (uint32_t n = 1; n <= 16; n++) { 8481 for (uint32_t m = 1; m <= 2; m++) { 8482 GemmMicrokernelTester() 8483 .mr(2) 8484 .nr(16) 8485 .kr(4) 8486 .sr(1) 8487 .m(m) 8488 .n(n) 8489 .k(8) 8490 .iterations(1) 8491 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8492 } 8493 } 8494 } 8495 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,k_eq_8_subtile_m)8496 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, k_eq_8_subtile_m) { 8497 TEST_REQUIRES_ARM_NEON; 8498 for (uint32_t m = 1; m <= 2; m++) { 8499 GemmMicrokernelTester() 8500 .mr(2) 8501 .nr(16) 8502 .kr(4) 8503 .sr(1) 8504 .m(m) 8505 .n(16) 8506 .k(8) 8507 .iterations(1) 8508 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8509 } 8510 } 8511 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,k_eq_8_subtile_n)8512 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, k_eq_8_subtile_n) { 8513 TEST_REQUIRES_ARM_NEON; 8514 for (uint32_t n = 1; n <= 16; n++) { 8515 GemmMicrokernelTester() 8516 .mr(2) 8517 .nr(16) 8518 .kr(4) 8519 .sr(1) 8520 .m(2) 8521 .n(n) 8522 .k(8) 8523 .iterations(1) 8524 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8525 } 8526 } 8527 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,k_lt_8)8528 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, k_lt_8) { 8529 TEST_REQUIRES_ARM_NEON; 8530 for (size_t k = 1; k < 8; k++) { 8531 GemmMicrokernelTester() 8532 .mr(2) 8533 .nr(16) 8534 .kr(4) 8535 .sr(1) 8536 .m(2) 8537 .n(16) 8538 .k(k) 8539 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8540 } 8541 } 8542 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,k_lt_8_subtile)8543 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, k_lt_8_subtile) { 8544 TEST_REQUIRES_ARM_NEON; 8545 for (size_t k = 1; k < 8; k++) { 8546 for (uint32_t n = 1; n <= 16; n++) { 8547 for (uint32_t m = 1; m <= 2; m++) { 8548 GemmMicrokernelTester() 8549 .mr(2) 8550 .nr(16) 8551 .kr(4) 8552 .sr(1) 8553 .m(m) 8554 .n(n) 8555 .k(k) 8556 .iterations(1) 8557 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8558 } 8559 } 8560 } 8561 } 8562 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,k_gt_8)8563 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, k_gt_8) { 8564 TEST_REQUIRES_ARM_NEON; 8565 for (size_t k = 9; k < 16; k++) { 8566 GemmMicrokernelTester() 8567 .mr(2) 8568 .nr(16) 8569 .kr(4) 8570 .sr(1) 8571 .m(2) 8572 .n(16) 8573 .k(k) 8574 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8575 } 8576 } 8577 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,k_gt_8_subtile)8578 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, k_gt_8_subtile) { 8579 TEST_REQUIRES_ARM_NEON; 8580 for (size_t k = 9; k < 16; k++) { 8581 for (uint32_t n = 1; n <= 16; n++) { 8582 for (uint32_t m = 1; m <= 2; m++) { 8583 GemmMicrokernelTester() 8584 .mr(2) 8585 .nr(16) 8586 .kr(4) 8587 .sr(1) 8588 .m(m) 8589 .n(n) 8590 .k(k) 8591 .iterations(1) 8592 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8593 } 8594 } 8595 } 8596 } 8597 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,k_div_8)8598 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, k_div_8) { 8599 TEST_REQUIRES_ARM_NEON; 8600 for (size_t k = 16; k <= 80; k += 8) { 8601 GemmMicrokernelTester() 8602 .mr(2) 8603 .nr(16) 8604 .kr(4) 8605 .sr(1) 8606 .m(2) 8607 .n(16) 8608 .k(k) 8609 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8610 } 8611 } 8612 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,k_div_8_subtile)8613 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, k_div_8_subtile) { 8614 TEST_REQUIRES_ARM_NEON; 8615 for (size_t k = 16; k <= 80; k += 8) { 8616 for (uint32_t n = 1; n <= 16; n++) { 8617 for (uint32_t m = 1; m <= 2; m++) { 8618 GemmMicrokernelTester() 8619 .mr(2) 8620 .nr(16) 8621 .kr(4) 8622 .sr(1) 8623 .m(m) 8624 .n(n) 8625 .k(k) 8626 .iterations(1) 8627 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8628 } 8629 } 8630 } 8631 } 8632 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,n_gt_16)8633 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, n_gt_16) { 8634 TEST_REQUIRES_ARM_NEON; 8635 for (uint32_t n = 17; n < 32; n++) { 8636 for (size_t k = 1; k <= 40; k += 9) { 8637 GemmMicrokernelTester() 8638 .mr(2) 8639 .nr(16) 8640 .kr(4) 8641 .sr(1) 8642 .m(2) 8643 .n(n) 8644 .k(k) 8645 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8646 } 8647 } 8648 } 8649 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,n_gt_16_strided_cn)8650 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, n_gt_16_strided_cn) { 8651 TEST_REQUIRES_ARM_NEON; 8652 for (uint32_t n = 17; n < 32; n++) { 8653 for (size_t k = 1; k <= 40; k += 9) { 8654 GemmMicrokernelTester() 8655 .mr(2) 8656 .nr(16) 8657 .kr(4) 8658 .sr(1) 8659 .m(2) 8660 .n(n) 8661 .k(k) 8662 .cn_stride(19) 8663 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8664 } 8665 } 8666 } 8667 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,n_gt_16_subtile)8668 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, n_gt_16_subtile) { 8669 TEST_REQUIRES_ARM_NEON; 8670 for (uint32_t n = 17; n < 32; n++) { 8671 for (size_t k = 1; k <= 40; k += 9) { 8672 for (uint32_t m = 1; m <= 2; m++) { 8673 GemmMicrokernelTester() 8674 .mr(2) 8675 .nr(16) 8676 .kr(4) 8677 .sr(1) 8678 .m(m) 8679 .n(n) 8680 .k(k) 8681 .iterations(1) 8682 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8683 } 8684 } 8685 } 8686 } 8687 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,n_div_16)8688 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, n_div_16) { 8689 TEST_REQUIRES_ARM_NEON; 8690 for (uint32_t n = 32; n <= 48; n += 16) { 8691 for (size_t k = 1; k <= 40; k += 9) { 8692 GemmMicrokernelTester() 8693 .mr(2) 8694 .nr(16) 8695 .kr(4) 8696 .sr(1) 8697 .m(2) 8698 .n(n) 8699 .k(k) 8700 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8701 } 8702 } 8703 } 8704 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,n_div_16_strided_cn)8705 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, n_div_16_strided_cn) { 8706 TEST_REQUIRES_ARM_NEON; 8707 for (uint32_t n = 32; n <= 48; n += 16) { 8708 for (size_t k = 1; k <= 40; k += 9) { 8709 GemmMicrokernelTester() 8710 .mr(2) 8711 .nr(16) 8712 .kr(4) 8713 .sr(1) 8714 .m(2) 8715 .n(n) 8716 .k(k) 8717 .cn_stride(19) 8718 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8719 } 8720 } 8721 } 8722 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,n_div_16_subtile)8723 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, n_div_16_subtile) { 8724 TEST_REQUIRES_ARM_NEON; 8725 for (uint32_t n = 32; n <= 48; n += 16) { 8726 for (size_t k = 1; k <= 40; k += 9) { 8727 for (uint32_t m = 1; m <= 2; m++) { 8728 GemmMicrokernelTester() 8729 .mr(2) 8730 .nr(16) 8731 .kr(4) 8732 .sr(1) 8733 .m(m) 8734 .n(n) 8735 .k(k) 8736 .iterations(1) 8737 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8738 } 8739 } 8740 } 8741 } 8742 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,small_kernel)8743 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, small_kernel) { 8744 TEST_REQUIRES_ARM_NEON; 8745 for (size_t k = 1; k <= 40; k += 9) { 8746 GemmMicrokernelTester() 8747 .mr(2) 8748 .nr(16) 8749 .kr(4) 8750 .sr(1) 8751 .m(2) 8752 .n(16) 8753 .k(k) 8754 .ks(3) 8755 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8756 } 8757 } 8758 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,small_kernel_subtile)8759 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, small_kernel_subtile) { 8760 TEST_REQUIRES_ARM_NEON; 8761 for (size_t k = 1; k <= 40; k += 9) { 8762 for (uint32_t n = 1; n <= 16; n++) { 8763 for (uint32_t m = 1; m <= 2; m++) { 8764 GemmMicrokernelTester() 8765 .mr(2) 8766 .nr(16) 8767 .kr(4) 8768 .sr(1) 8769 .m(m) 8770 .n(n) 8771 .k(k) 8772 .ks(3) 8773 .iterations(1) 8774 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8775 } 8776 } 8777 } 8778 } 8779 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,n_gt_16_small_kernel)8780 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, n_gt_16_small_kernel) { 8781 TEST_REQUIRES_ARM_NEON; 8782 for (uint32_t n = 17; n < 32; n++) { 8783 for (size_t k = 1; k <= 40; k += 9) { 8784 GemmMicrokernelTester() 8785 .mr(2) 8786 .nr(16) 8787 .kr(4) 8788 .sr(1) 8789 .m(2) 8790 .n(n) 8791 .k(k) 8792 .ks(3) 8793 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8794 } 8795 } 8796 } 8797 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,n_div_16_small_kernel)8798 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, n_div_16_small_kernel) { 8799 TEST_REQUIRES_ARM_NEON; 8800 for (uint32_t n = 32; n <= 48; n += 16) { 8801 for (size_t k = 1; k <= 40; k += 9) { 8802 GemmMicrokernelTester() 8803 .mr(2) 8804 .nr(16) 8805 .kr(4) 8806 .sr(1) 8807 .m(2) 8808 .n(n) 8809 .k(k) 8810 .ks(3) 8811 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8812 } 8813 } 8814 } 8815 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,strided_cm_subtile)8816 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, strided_cm_subtile) { 8817 TEST_REQUIRES_ARM_NEON; 8818 for (size_t k = 1; k <= 40; k += 9) { 8819 for (uint32_t n = 1; n <= 16; n++) { 8820 for (uint32_t m = 1; m <= 2; m++) { 8821 GemmMicrokernelTester() 8822 .mr(2) 8823 .nr(16) 8824 .kr(4) 8825 .sr(1) 8826 .m(m) 8827 .n(n) 8828 .k(k) 8829 .cm_stride(19) 8830 .iterations(1) 8831 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8832 } 8833 } 8834 } 8835 } 8836 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,a_offset)8837 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, a_offset) { 8838 TEST_REQUIRES_ARM_NEON; 8839 for (size_t k = 1; k <= 40; k += 9) { 8840 GemmMicrokernelTester() 8841 .mr(2) 8842 .nr(16) 8843 .kr(4) 8844 .sr(1) 8845 .m(2) 8846 .n(16) 8847 .k(k) 8848 .ks(3) 8849 .a_offset(83) 8850 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8851 } 8852 } 8853 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,zero)8854 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, zero) { 8855 TEST_REQUIRES_ARM_NEON; 8856 for (size_t k = 1; k <= 40; k += 9) { 8857 for (uint32_t mz = 0; mz < 2; mz++) { 8858 GemmMicrokernelTester() 8859 .mr(2) 8860 .nr(16) 8861 .kr(4) 8862 .sr(1) 8863 .m(2) 8864 .n(16) 8865 .k(k) 8866 .ks(3) 8867 .a_offset(83) 8868 .zero_index(mz) 8869 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8870 } 8871 } 8872 } 8873 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,qmin)8874 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, qmin) { 8875 TEST_REQUIRES_ARM_NEON; 8876 GemmMicrokernelTester() 8877 .mr(2) 8878 .nr(16) 8879 .kr(4) 8880 .sr(1) 8881 .m(2) 8882 .n(16) 8883 .k(8) 8884 .qmin(128) 8885 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8886 } 8887 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,qmax)8888 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, qmax) { 8889 TEST_REQUIRES_ARM_NEON; 8890 GemmMicrokernelTester() 8891 .mr(2) 8892 .nr(16) 8893 .kr(4) 8894 .sr(1) 8895 .m(2) 8896 .n(16) 8897 .k(8) 8898 .qmax(128) 8899 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8900 } 8901 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP,strided_cm)8902 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_DUP, strided_cm) { 8903 TEST_REQUIRES_ARM_NEON; 8904 GemmMicrokernelTester() 8905 .mr(2) 8906 .nr(16) 8907 .kr(4) 8908 .sr(1) 8909 .m(2) 8910 .n(16) 8911 .k(8) 8912 .cm_stride(19) 8913 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8914 } 8915 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 8916 8917 8918 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,k_eq_16)8919 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, k_eq_16) { 8920 TEST_REQUIRES_ARM_NEON; 8921 GemmMicrokernelTester() 8922 .mr(1) 8923 .nr(8) 8924 .kr(4) 8925 .sr(1) 8926 .m(1) 8927 .n(8) 8928 .k(16) 8929 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8930 } 8931 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,strided_cn)8932 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, strided_cn) { 8933 TEST_REQUIRES_ARM_NEON; 8934 GemmMicrokernelTester() 8935 .mr(1) 8936 .nr(8) 8937 .kr(4) 8938 .sr(1) 8939 .m(1) 8940 .n(8) 8941 .k(16) 8942 .cn_stride(11) 8943 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8944 } 8945 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,k_eq_16_subtile)8946 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, k_eq_16_subtile) { 8947 TEST_REQUIRES_ARM_NEON; 8948 for (uint32_t n = 1; n <= 8; n++) { 8949 for (uint32_t m = 1; m <= 1; m++) { 8950 GemmMicrokernelTester() 8951 .mr(1) 8952 .nr(8) 8953 .kr(4) 8954 .sr(1) 8955 .m(m) 8956 .n(n) 8957 .k(16) 8958 .iterations(1) 8959 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8960 } 8961 } 8962 } 8963 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,k_eq_16_subtile_m)8964 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, k_eq_16_subtile_m) { 8965 TEST_REQUIRES_ARM_NEON; 8966 for (uint32_t m = 1; m <= 1; m++) { 8967 GemmMicrokernelTester() 8968 .mr(1) 8969 .nr(8) 8970 .kr(4) 8971 .sr(1) 8972 .m(m) 8973 .n(8) 8974 .k(16) 8975 .iterations(1) 8976 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8977 } 8978 } 8979 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,k_eq_16_subtile_n)8980 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, k_eq_16_subtile_n) { 8981 TEST_REQUIRES_ARM_NEON; 8982 for (uint32_t n = 1; n <= 8; n++) { 8983 GemmMicrokernelTester() 8984 .mr(1) 8985 .nr(8) 8986 .kr(4) 8987 .sr(1) 8988 .m(1) 8989 .n(n) 8990 .k(16) 8991 .iterations(1) 8992 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8993 } 8994 } 8995 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,k_lt_16)8996 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, k_lt_16) { 8997 TEST_REQUIRES_ARM_NEON; 8998 for (size_t k = 1; k < 16; k++) { 8999 GemmMicrokernelTester() 9000 .mr(1) 9001 .nr(8) 9002 .kr(4) 9003 .sr(1) 9004 .m(1) 9005 .n(8) 9006 .k(k) 9007 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9008 } 9009 } 9010 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,k_lt_16_subtile)9011 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, k_lt_16_subtile) { 9012 TEST_REQUIRES_ARM_NEON; 9013 for (size_t k = 1; k < 16; k++) { 9014 for (uint32_t n = 1; n <= 8; n++) { 9015 for (uint32_t m = 1; m <= 1; m++) { 9016 GemmMicrokernelTester() 9017 .mr(1) 9018 .nr(8) 9019 .kr(4) 9020 .sr(1) 9021 .m(m) 9022 .n(n) 9023 .k(k) 9024 .iterations(1) 9025 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9026 } 9027 } 9028 } 9029 } 9030 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,k_gt_16)9031 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, k_gt_16) { 9032 TEST_REQUIRES_ARM_NEON; 9033 for (size_t k = 17; k < 32; k++) { 9034 GemmMicrokernelTester() 9035 .mr(1) 9036 .nr(8) 9037 .kr(4) 9038 .sr(1) 9039 .m(1) 9040 .n(8) 9041 .k(k) 9042 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9043 } 9044 } 9045 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,k_gt_16_subtile)9046 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, k_gt_16_subtile) { 9047 TEST_REQUIRES_ARM_NEON; 9048 for (size_t k = 17; k < 32; k++) { 9049 for (uint32_t n = 1; n <= 8; n++) { 9050 for (uint32_t m = 1; m <= 1; m++) { 9051 GemmMicrokernelTester() 9052 .mr(1) 9053 .nr(8) 9054 .kr(4) 9055 .sr(1) 9056 .m(m) 9057 .n(n) 9058 .k(k) 9059 .iterations(1) 9060 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9061 } 9062 } 9063 } 9064 } 9065 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,k_div_16)9066 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, k_div_16) { 9067 TEST_REQUIRES_ARM_NEON; 9068 for (size_t k = 32; k <= 160; k += 16) { 9069 GemmMicrokernelTester() 9070 .mr(1) 9071 .nr(8) 9072 .kr(4) 9073 .sr(1) 9074 .m(1) 9075 .n(8) 9076 .k(k) 9077 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9078 } 9079 } 9080 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,k_div_16_subtile)9081 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, k_div_16_subtile) { 9082 TEST_REQUIRES_ARM_NEON; 9083 for (size_t k = 32; k <= 160; k += 16) { 9084 for (uint32_t n = 1; n <= 8; n++) { 9085 for (uint32_t m = 1; m <= 1; m++) { 9086 GemmMicrokernelTester() 9087 .mr(1) 9088 .nr(8) 9089 .kr(4) 9090 .sr(1) 9091 .m(m) 9092 .n(n) 9093 .k(k) 9094 .iterations(1) 9095 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9096 } 9097 } 9098 } 9099 } 9100 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,n_gt_8)9101 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, n_gt_8) { 9102 TEST_REQUIRES_ARM_NEON; 9103 for (uint32_t n = 9; n < 16; n++) { 9104 for (size_t k = 1; k <= 80; k += 17) { 9105 GemmMicrokernelTester() 9106 .mr(1) 9107 .nr(8) 9108 .kr(4) 9109 .sr(1) 9110 .m(1) 9111 .n(n) 9112 .k(k) 9113 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9114 } 9115 } 9116 } 9117 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,n_gt_8_strided_cn)9118 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, n_gt_8_strided_cn) { 9119 TEST_REQUIRES_ARM_NEON; 9120 for (uint32_t n = 9; n < 16; n++) { 9121 for (size_t k = 1; k <= 80; k += 17) { 9122 GemmMicrokernelTester() 9123 .mr(1) 9124 .nr(8) 9125 .kr(4) 9126 .sr(1) 9127 .m(1) 9128 .n(n) 9129 .k(k) 9130 .cn_stride(11) 9131 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9132 } 9133 } 9134 } 9135 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,n_gt_8_subtile)9136 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, n_gt_8_subtile) { 9137 TEST_REQUIRES_ARM_NEON; 9138 for (uint32_t n = 9; n < 16; n++) { 9139 for (size_t k = 1; k <= 80; k += 17) { 9140 for (uint32_t m = 1; m <= 1; m++) { 9141 GemmMicrokernelTester() 9142 .mr(1) 9143 .nr(8) 9144 .kr(4) 9145 .sr(1) 9146 .m(m) 9147 .n(n) 9148 .k(k) 9149 .iterations(1) 9150 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9151 } 9152 } 9153 } 9154 } 9155 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,n_div_8)9156 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, n_div_8) { 9157 TEST_REQUIRES_ARM_NEON; 9158 for (uint32_t n = 16; n <= 24; n += 8) { 9159 for (size_t k = 1; k <= 80; k += 17) { 9160 GemmMicrokernelTester() 9161 .mr(1) 9162 .nr(8) 9163 .kr(4) 9164 .sr(1) 9165 .m(1) 9166 .n(n) 9167 .k(k) 9168 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9169 } 9170 } 9171 } 9172 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,n_div_8_strided_cn)9173 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, n_div_8_strided_cn) { 9174 TEST_REQUIRES_ARM_NEON; 9175 for (uint32_t n = 16; n <= 24; n += 8) { 9176 for (size_t k = 1; k <= 80; k += 17) { 9177 GemmMicrokernelTester() 9178 .mr(1) 9179 .nr(8) 9180 .kr(4) 9181 .sr(1) 9182 .m(1) 9183 .n(n) 9184 .k(k) 9185 .cn_stride(11) 9186 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9187 } 9188 } 9189 } 9190 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,n_div_8_subtile)9191 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, n_div_8_subtile) { 9192 TEST_REQUIRES_ARM_NEON; 9193 for (uint32_t n = 16; n <= 24; n += 8) { 9194 for (size_t k = 1; k <= 80; k += 17) { 9195 for (uint32_t m = 1; m <= 1; m++) { 9196 GemmMicrokernelTester() 9197 .mr(1) 9198 .nr(8) 9199 .kr(4) 9200 .sr(1) 9201 .m(m) 9202 .n(n) 9203 .k(k) 9204 .iterations(1) 9205 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9206 } 9207 } 9208 } 9209 } 9210 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,small_kernel)9211 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, small_kernel) { 9212 TEST_REQUIRES_ARM_NEON; 9213 for (size_t k = 1; k <= 80; k += 17) { 9214 GemmMicrokernelTester() 9215 .mr(1) 9216 .nr(8) 9217 .kr(4) 9218 .sr(1) 9219 .m(1) 9220 .n(8) 9221 .k(k) 9222 .ks(3) 9223 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9224 } 9225 } 9226 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,small_kernel_subtile)9227 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, small_kernel_subtile) { 9228 TEST_REQUIRES_ARM_NEON; 9229 for (size_t k = 1; k <= 80; k += 17) { 9230 for (uint32_t n = 1; n <= 8; n++) { 9231 for (uint32_t m = 1; m <= 1; m++) { 9232 GemmMicrokernelTester() 9233 .mr(1) 9234 .nr(8) 9235 .kr(4) 9236 .sr(1) 9237 .m(m) 9238 .n(n) 9239 .k(k) 9240 .ks(3) 9241 .iterations(1) 9242 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9243 } 9244 } 9245 } 9246 } 9247 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,n_gt_8_small_kernel)9248 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, n_gt_8_small_kernel) { 9249 TEST_REQUIRES_ARM_NEON; 9250 for (uint32_t n = 9; n < 16; n++) { 9251 for (size_t k = 1; k <= 80; k += 17) { 9252 GemmMicrokernelTester() 9253 .mr(1) 9254 .nr(8) 9255 .kr(4) 9256 .sr(1) 9257 .m(1) 9258 .n(n) 9259 .k(k) 9260 .ks(3) 9261 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9262 } 9263 } 9264 } 9265 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,n_div_8_small_kernel)9266 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, n_div_8_small_kernel) { 9267 TEST_REQUIRES_ARM_NEON; 9268 for (uint32_t n = 16; n <= 24; n += 8) { 9269 for (size_t k = 1; k <= 80; k += 17) { 9270 GemmMicrokernelTester() 9271 .mr(1) 9272 .nr(8) 9273 .kr(4) 9274 .sr(1) 9275 .m(1) 9276 .n(n) 9277 .k(k) 9278 .ks(3) 9279 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9280 } 9281 } 9282 } 9283 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,strided_cm_subtile)9284 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, strided_cm_subtile) { 9285 TEST_REQUIRES_ARM_NEON; 9286 for (size_t k = 1; k <= 80; k += 17) { 9287 for (uint32_t n = 1; n <= 8; n++) { 9288 for (uint32_t m = 1; m <= 1; m++) { 9289 GemmMicrokernelTester() 9290 .mr(1) 9291 .nr(8) 9292 .kr(4) 9293 .sr(1) 9294 .m(m) 9295 .n(n) 9296 .k(k) 9297 .cm_stride(11) 9298 .iterations(1) 9299 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9300 } 9301 } 9302 } 9303 } 9304 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,a_offset)9305 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, a_offset) { 9306 TEST_REQUIRES_ARM_NEON; 9307 for (size_t k = 1; k <= 80; k += 17) { 9308 GemmMicrokernelTester() 9309 .mr(1) 9310 .nr(8) 9311 .kr(4) 9312 .sr(1) 9313 .m(1) 9314 .n(8) 9315 .k(k) 9316 .ks(3) 9317 .a_offset(83) 9318 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9319 } 9320 } 9321 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,zero)9322 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, zero) { 9323 TEST_REQUIRES_ARM_NEON; 9324 for (size_t k = 1; k <= 80; k += 17) { 9325 for (uint32_t mz = 0; mz < 1; mz++) { 9326 GemmMicrokernelTester() 9327 .mr(1) 9328 .nr(8) 9329 .kr(4) 9330 .sr(1) 9331 .m(1) 9332 .n(8) 9333 .k(k) 9334 .ks(3) 9335 .a_offset(83) 9336 .zero_index(mz) 9337 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9338 } 9339 } 9340 } 9341 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,qmin)9342 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, qmin) { 9343 TEST_REQUIRES_ARM_NEON; 9344 GemmMicrokernelTester() 9345 .mr(1) 9346 .nr(8) 9347 .kr(4) 9348 .sr(1) 9349 .m(1) 9350 .n(8) 9351 .k(16) 9352 .qmin(128) 9353 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9354 } 9355 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,qmax)9356 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, qmax) { 9357 TEST_REQUIRES_ARM_NEON; 9358 GemmMicrokernelTester() 9359 .mr(1) 9360 .nr(8) 9361 .kr(4) 9362 .sr(1) 9363 .m(1) 9364 .n(8) 9365 .k(16) 9366 .qmax(128) 9367 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9368 } 9369 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP,strided_cm)9370 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_DUP, strided_cm) { 9371 TEST_REQUIRES_ARM_NEON; 9372 GemmMicrokernelTester() 9373 .mr(1) 9374 .nr(8) 9375 .kr(4) 9376 .sr(1) 9377 .m(1) 9378 .n(8) 9379 .k(16) 9380 .cm_stride(11) 9381 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9382 } 9383 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 9384 9385 9386 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,k_eq_16)9387 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, k_eq_16) { 9388 TEST_REQUIRES_ARM_NEON; 9389 GemmMicrokernelTester() 9390 .mr(3) 9391 .nr(8) 9392 .kr(4) 9393 .sr(1) 9394 .m(3) 9395 .n(8) 9396 .k(16) 9397 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9398 } 9399 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,strided_cn)9400 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, strided_cn) { 9401 TEST_REQUIRES_ARM_NEON; 9402 GemmMicrokernelTester() 9403 .mr(3) 9404 .nr(8) 9405 .kr(4) 9406 .sr(1) 9407 .m(3) 9408 .n(8) 9409 .k(16) 9410 .cn_stride(11) 9411 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9412 } 9413 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,k_eq_16_subtile)9414 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, k_eq_16_subtile) { 9415 TEST_REQUIRES_ARM_NEON; 9416 for (uint32_t n = 1; n <= 8; n++) { 9417 for (uint32_t m = 1; m <= 3; m++) { 9418 GemmMicrokernelTester() 9419 .mr(3) 9420 .nr(8) 9421 .kr(4) 9422 .sr(1) 9423 .m(m) 9424 .n(n) 9425 .k(16) 9426 .iterations(1) 9427 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9428 } 9429 } 9430 } 9431 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,k_eq_16_subtile_m)9432 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, k_eq_16_subtile_m) { 9433 TEST_REQUIRES_ARM_NEON; 9434 for (uint32_t m = 1; m <= 3; m++) { 9435 GemmMicrokernelTester() 9436 .mr(3) 9437 .nr(8) 9438 .kr(4) 9439 .sr(1) 9440 .m(m) 9441 .n(8) 9442 .k(16) 9443 .iterations(1) 9444 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9445 } 9446 } 9447 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,k_eq_16_subtile_n)9448 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, k_eq_16_subtile_n) { 9449 TEST_REQUIRES_ARM_NEON; 9450 for (uint32_t n = 1; n <= 8; n++) { 9451 GemmMicrokernelTester() 9452 .mr(3) 9453 .nr(8) 9454 .kr(4) 9455 .sr(1) 9456 .m(3) 9457 .n(n) 9458 .k(16) 9459 .iterations(1) 9460 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9461 } 9462 } 9463 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,k_lt_16)9464 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, k_lt_16) { 9465 TEST_REQUIRES_ARM_NEON; 9466 for (size_t k = 1; k < 16; k++) { 9467 GemmMicrokernelTester() 9468 .mr(3) 9469 .nr(8) 9470 .kr(4) 9471 .sr(1) 9472 .m(3) 9473 .n(8) 9474 .k(k) 9475 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9476 } 9477 } 9478 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,k_lt_16_subtile)9479 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, k_lt_16_subtile) { 9480 TEST_REQUIRES_ARM_NEON; 9481 for (size_t k = 1; k < 16; k++) { 9482 for (uint32_t n = 1; n <= 8; n++) { 9483 for (uint32_t m = 1; m <= 3; m++) { 9484 GemmMicrokernelTester() 9485 .mr(3) 9486 .nr(8) 9487 .kr(4) 9488 .sr(1) 9489 .m(m) 9490 .n(n) 9491 .k(k) 9492 .iterations(1) 9493 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9494 } 9495 } 9496 } 9497 } 9498 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,k_gt_16)9499 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, k_gt_16) { 9500 TEST_REQUIRES_ARM_NEON; 9501 for (size_t k = 17; k < 32; k++) { 9502 GemmMicrokernelTester() 9503 .mr(3) 9504 .nr(8) 9505 .kr(4) 9506 .sr(1) 9507 .m(3) 9508 .n(8) 9509 .k(k) 9510 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9511 } 9512 } 9513 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,k_gt_16_subtile)9514 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, k_gt_16_subtile) { 9515 TEST_REQUIRES_ARM_NEON; 9516 for (size_t k = 17; k < 32; k++) { 9517 for (uint32_t n = 1; n <= 8; n++) { 9518 for (uint32_t m = 1; m <= 3; m++) { 9519 GemmMicrokernelTester() 9520 .mr(3) 9521 .nr(8) 9522 .kr(4) 9523 .sr(1) 9524 .m(m) 9525 .n(n) 9526 .k(k) 9527 .iterations(1) 9528 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9529 } 9530 } 9531 } 9532 } 9533 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,k_div_16)9534 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, k_div_16) { 9535 TEST_REQUIRES_ARM_NEON; 9536 for (size_t k = 32; k <= 160; k += 16) { 9537 GemmMicrokernelTester() 9538 .mr(3) 9539 .nr(8) 9540 .kr(4) 9541 .sr(1) 9542 .m(3) 9543 .n(8) 9544 .k(k) 9545 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9546 } 9547 } 9548 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,k_div_16_subtile)9549 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, k_div_16_subtile) { 9550 TEST_REQUIRES_ARM_NEON; 9551 for (size_t k = 32; k <= 160; k += 16) { 9552 for (uint32_t n = 1; n <= 8; n++) { 9553 for (uint32_t m = 1; m <= 3; m++) { 9554 GemmMicrokernelTester() 9555 .mr(3) 9556 .nr(8) 9557 .kr(4) 9558 .sr(1) 9559 .m(m) 9560 .n(n) 9561 .k(k) 9562 .iterations(1) 9563 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9564 } 9565 } 9566 } 9567 } 9568 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,n_gt_8)9569 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, n_gt_8) { 9570 TEST_REQUIRES_ARM_NEON; 9571 for (uint32_t n = 9; n < 16; n++) { 9572 for (size_t k = 1; k <= 80; k += 17) { 9573 GemmMicrokernelTester() 9574 .mr(3) 9575 .nr(8) 9576 .kr(4) 9577 .sr(1) 9578 .m(3) 9579 .n(n) 9580 .k(k) 9581 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9582 } 9583 } 9584 } 9585 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,n_gt_8_strided_cn)9586 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, n_gt_8_strided_cn) { 9587 TEST_REQUIRES_ARM_NEON; 9588 for (uint32_t n = 9; n < 16; n++) { 9589 for (size_t k = 1; k <= 80; k += 17) { 9590 GemmMicrokernelTester() 9591 .mr(3) 9592 .nr(8) 9593 .kr(4) 9594 .sr(1) 9595 .m(3) 9596 .n(n) 9597 .k(k) 9598 .cn_stride(11) 9599 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9600 } 9601 } 9602 } 9603 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,n_gt_8_subtile)9604 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, n_gt_8_subtile) { 9605 TEST_REQUIRES_ARM_NEON; 9606 for (uint32_t n = 9; n < 16; n++) { 9607 for (size_t k = 1; k <= 80; k += 17) { 9608 for (uint32_t m = 1; m <= 3; m++) { 9609 GemmMicrokernelTester() 9610 .mr(3) 9611 .nr(8) 9612 .kr(4) 9613 .sr(1) 9614 .m(m) 9615 .n(n) 9616 .k(k) 9617 .iterations(1) 9618 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9619 } 9620 } 9621 } 9622 } 9623 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,n_div_8)9624 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, n_div_8) { 9625 TEST_REQUIRES_ARM_NEON; 9626 for (uint32_t n = 16; n <= 24; n += 8) { 9627 for (size_t k = 1; k <= 80; k += 17) { 9628 GemmMicrokernelTester() 9629 .mr(3) 9630 .nr(8) 9631 .kr(4) 9632 .sr(1) 9633 .m(3) 9634 .n(n) 9635 .k(k) 9636 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9637 } 9638 } 9639 } 9640 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,n_div_8_strided_cn)9641 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, n_div_8_strided_cn) { 9642 TEST_REQUIRES_ARM_NEON; 9643 for (uint32_t n = 16; n <= 24; n += 8) { 9644 for (size_t k = 1; k <= 80; k += 17) { 9645 GemmMicrokernelTester() 9646 .mr(3) 9647 .nr(8) 9648 .kr(4) 9649 .sr(1) 9650 .m(3) 9651 .n(n) 9652 .k(k) 9653 .cn_stride(11) 9654 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9655 } 9656 } 9657 } 9658 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,n_div_8_subtile)9659 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, n_div_8_subtile) { 9660 TEST_REQUIRES_ARM_NEON; 9661 for (uint32_t n = 16; n <= 24; n += 8) { 9662 for (size_t k = 1; k <= 80; k += 17) { 9663 for (uint32_t m = 1; m <= 3; m++) { 9664 GemmMicrokernelTester() 9665 .mr(3) 9666 .nr(8) 9667 .kr(4) 9668 .sr(1) 9669 .m(m) 9670 .n(n) 9671 .k(k) 9672 .iterations(1) 9673 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9674 } 9675 } 9676 } 9677 } 9678 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,small_kernel)9679 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, small_kernel) { 9680 TEST_REQUIRES_ARM_NEON; 9681 for (size_t k = 1; k <= 80; k += 17) { 9682 GemmMicrokernelTester() 9683 .mr(3) 9684 .nr(8) 9685 .kr(4) 9686 .sr(1) 9687 .m(3) 9688 .n(8) 9689 .k(k) 9690 .ks(3) 9691 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9692 } 9693 } 9694 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,small_kernel_subtile)9695 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, small_kernel_subtile) { 9696 TEST_REQUIRES_ARM_NEON; 9697 for (size_t k = 1; k <= 80; k += 17) { 9698 for (uint32_t n = 1; n <= 8; n++) { 9699 for (uint32_t m = 1; m <= 3; m++) { 9700 GemmMicrokernelTester() 9701 .mr(3) 9702 .nr(8) 9703 .kr(4) 9704 .sr(1) 9705 .m(m) 9706 .n(n) 9707 .k(k) 9708 .ks(3) 9709 .iterations(1) 9710 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9711 } 9712 } 9713 } 9714 } 9715 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,n_gt_8_small_kernel)9716 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, n_gt_8_small_kernel) { 9717 TEST_REQUIRES_ARM_NEON; 9718 for (uint32_t n = 9; n < 16; n++) { 9719 for (size_t k = 1; k <= 80; k += 17) { 9720 GemmMicrokernelTester() 9721 .mr(3) 9722 .nr(8) 9723 .kr(4) 9724 .sr(1) 9725 .m(3) 9726 .n(n) 9727 .k(k) 9728 .ks(3) 9729 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9730 } 9731 } 9732 } 9733 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,n_div_8_small_kernel)9734 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, n_div_8_small_kernel) { 9735 TEST_REQUIRES_ARM_NEON; 9736 for (uint32_t n = 16; n <= 24; n += 8) { 9737 for (size_t k = 1; k <= 80; k += 17) { 9738 GemmMicrokernelTester() 9739 .mr(3) 9740 .nr(8) 9741 .kr(4) 9742 .sr(1) 9743 .m(3) 9744 .n(n) 9745 .k(k) 9746 .ks(3) 9747 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9748 } 9749 } 9750 } 9751 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,strided_cm_subtile)9752 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, strided_cm_subtile) { 9753 TEST_REQUIRES_ARM_NEON; 9754 for (size_t k = 1; k <= 80; k += 17) { 9755 for (uint32_t n = 1; n <= 8; n++) { 9756 for (uint32_t m = 1; m <= 3; m++) { 9757 GemmMicrokernelTester() 9758 .mr(3) 9759 .nr(8) 9760 .kr(4) 9761 .sr(1) 9762 .m(m) 9763 .n(n) 9764 .k(k) 9765 .cm_stride(11) 9766 .iterations(1) 9767 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9768 } 9769 } 9770 } 9771 } 9772 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,a_offset)9773 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, a_offset) { 9774 TEST_REQUIRES_ARM_NEON; 9775 for (size_t k = 1; k <= 80; k += 17) { 9776 GemmMicrokernelTester() 9777 .mr(3) 9778 .nr(8) 9779 .kr(4) 9780 .sr(1) 9781 .m(3) 9782 .n(8) 9783 .k(k) 9784 .ks(3) 9785 .a_offset(251) 9786 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9787 } 9788 } 9789 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,zero)9790 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, zero) { 9791 TEST_REQUIRES_ARM_NEON; 9792 for (size_t k = 1; k <= 80; k += 17) { 9793 for (uint32_t mz = 0; mz < 3; mz++) { 9794 GemmMicrokernelTester() 9795 .mr(3) 9796 .nr(8) 9797 .kr(4) 9798 .sr(1) 9799 .m(3) 9800 .n(8) 9801 .k(k) 9802 .ks(3) 9803 .a_offset(251) 9804 .zero_index(mz) 9805 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9806 } 9807 } 9808 } 9809 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,qmin)9810 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, qmin) { 9811 TEST_REQUIRES_ARM_NEON; 9812 GemmMicrokernelTester() 9813 .mr(3) 9814 .nr(8) 9815 .kr(4) 9816 .sr(1) 9817 .m(3) 9818 .n(8) 9819 .k(16) 9820 .qmin(128) 9821 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9822 } 9823 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,qmax)9824 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, qmax) { 9825 TEST_REQUIRES_ARM_NEON; 9826 GemmMicrokernelTester() 9827 .mr(3) 9828 .nr(8) 9829 .kr(4) 9830 .sr(1) 9831 .m(3) 9832 .n(8) 9833 .k(16) 9834 .qmax(128) 9835 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9836 } 9837 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP,strided_cm)9838 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, strided_cm) { 9839 TEST_REQUIRES_ARM_NEON; 9840 GemmMicrokernelTester() 9841 .mr(3) 9842 .nr(8) 9843 .kr(4) 9844 .sr(1) 9845 .m(3) 9846 .n(8) 9847 .k(16) 9848 .cm_stride(11) 9849 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9850 } 9851 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 9852 9853 9854 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_eq_16)9855 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_eq_16) { 9856 TEST_REQUIRES_ARM_NEON; 9857 GemmMicrokernelTester() 9858 .mr(1) 9859 .nr(16) 9860 .kr(4) 9861 .sr(1) 9862 .m(1) 9863 .n(16) 9864 .k(16) 9865 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9866 } 9867 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,strided_cn)9868 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, strided_cn) { 9869 TEST_REQUIRES_ARM_NEON; 9870 GemmMicrokernelTester() 9871 .mr(1) 9872 .nr(16) 9873 .kr(4) 9874 .sr(1) 9875 .m(1) 9876 .n(16) 9877 .k(16) 9878 .cn_stride(19) 9879 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9880 } 9881 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_eq_16_subtile)9882 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_eq_16_subtile) { 9883 TEST_REQUIRES_ARM_NEON; 9884 for (uint32_t n = 1; n <= 16; n++) { 9885 for (uint32_t m = 1; m <= 1; m++) { 9886 GemmMicrokernelTester() 9887 .mr(1) 9888 .nr(16) 9889 .kr(4) 9890 .sr(1) 9891 .m(m) 9892 .n(n) 9893 .k(16) 9894 .iterations(1) 9895 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9896 } 9897 } 9898 } 9899 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_eq_16_subtile_m)9900 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_eq_16_subtile_m) { 9901 TEST_REQUIRES_ARM_NEON; 9902 for (uint32_t m = 1; m <= 1; m++) { 9903 GemmMicrokernelTester() 9904 .mr(1) 9905 .nr(16) 9906 .kr(4) 9907 .sr(1) 9908 .m(m) 9909 .n(16) 9910 .k(16) 9911 .iterations(1) 9912 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9913 } 9914 } 9915 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_eq_16_subtile_n)9916 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_eq_16_subtile_n) { 9917 TEST_REQUIRES_ARM_NEON; 9918 for (uint32_t n = 1; n <= 16; n++) { 9919 GemmMicrokernelTester() 9920 .mr(1) 9921 .nr(16) 9922 .kr(4) 9923 .sr(1) 9924 .m(1) 9925 .n(n) 9926 .k(16) 9927 .iterations(1) 9928 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9929 } 9930 } 9931 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_lt_16)9932 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_lt_16) { 9933 TEST_REQUIRES_ARM_NEON; 9934 for (size_t k = 1; k < 16; k++) { 9935 GemmMicrokernelTester() 9936 .mr(1) 9937 .nr(16) 9938 .kr(4) 9939 .sr(1) 9940 .m(1) 9941 .n(16) 9942 .k(k) 9943 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9944 } 9945 } 9946 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_lt_16_subtile)9947 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_lt_16_subtile) { 9948 TEST_REQUIRES_ARM_NEON; 9949 for (size_t k = 1; k < 16; k++) { 9950 for (uint32_t n = 1; n <= 16; n++) { 9951 for (uint32_t m = 1; m <= 1; m++) { 9952 GemmMicrokernelTester() 9953 .mr(1) 9954 .nr(16) 9955 .kr(4) 9956 .sr(1) 9957 .m(m) 9958 .n(n) 9959 .k(k) 9960 .iterations(1) 9961 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9962 } 9963 } 9964 } 9965 } 9966 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_gt_16)9967 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_gt_16) { 9968 TEST_REQUIRES_ARM_NEON; 9969 for (size_t k = 17; k < 32; k++) { 9970 GemmMicrokernelTester() 9971 .mr(1) 9972 .nr(16) 9973 .kr(4) 9974 .sr(1) 9975 .m(1) 9976 .n(16) 9977 .k(k) 9978 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9979 } 9980 } 9981 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_gt_16_subtile)9982 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_gt_16_subtile) { 9983 TEST_REQUIRES_ARM_NEON; 9984 for (size_t k = 17; k < 32; k++) { 9985 for (uint32_t n = 1; n <= 16; n++) { 9986 for (uint32_t m = 1; m <= 1; m++) { 9987 GemmMicrokernelTester() 9988 .mr(1) 9989 .nr(16) 9990 .kr(4) 9991 .sr(1) 9992 .m(m) 9993 .n(n) 9994 .k(k) 9995 .iterations(1) 9996 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9997 } 9998 } 9999 } 10000 } 10001 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_div_16)10002 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_div_16) { 10003 TEST_REQUIRES_ARM_NEON; 10004 for (size_t k = 32; k <= 160; k += 16) { 10005 GemmMicrokernelTester() 10006 .mr(1) 10007 .nr(16) 10008 .kr(4) 10009 .sr(1) 10010 .m(1) 10011 .n(16) 10012 .k(k) 10013 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10014 } 10015 } 10016 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_div_16_subtile)10017 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_div_16_subtile) { 10018 TEST_REQUIRES_ARM_NEON; 10019 for (size_t k = 32; k <= 160; k += 16) { 10020 for (uint32_t n = 1; n <= 16; n++) { 10021 for (uint32_t m = 1; m <= 1; m++) { 10022 GemmMicrokernelTester() 10023 .mr(1) 10024 .nr(16) 10025 .kr(4) 10026 .sr(1) 10027 .m(m) 10028 .n(n) 10029 .k(k) 10030 .iterations(1) 10031 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10032 } 10033 } 10034 } 10035 } 10036 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,n_gt_16)10037 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, n_gt_16) { 10038 TEST_REQUIRES_ARM_NEON; 10039 for (uint32_t n = 17; n < 32; n++) { 10040 for (size_t k = 1; k <= 80; k += 17) { 10041 GemmMicrokernelTester() 10042 .mr(1) 10043 .nr(16) 10044 .kr(4) 10045 .sr(1) 10046 .m(1) 10047 .n(n) 10048 .k(k) 10049 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10050 } 10051 } 10052 } 10053 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,n_gt_16_strided_cn)10054 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, n_gt_16_strided_cn) { 10055 TEST_REQUIRES_ARM_NEON; 10056 for (uint32_t n = 17; n < 32; n++) { 10057 for (size_t k = 1; k <= 80; k += 17) { 10058 GemmMicrokernelTester() 10059 .mr(1) 10060 .nr(16) 10061 .kr(4) 10062 .sr(1) 10063 .m(1) 10064 .n(n) 10065 .k(k) 10066 .cn_stride(19) 10067 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10068 } 10069 } 10070 } 10071 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,n_gt_16_subtile)10072 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, n_gt_16_subtile) { 10073 TEST_REQUIRES_ARM_NEON; 10074 for (uint32_t n = 17; n < 32; n++) { 10075 for (size_t k = 1; k <= 80; k += 17) { 10076 for (uint32_t m = 1; m <= 1; m++) { 10077 GemmMicrokernelTester() 10078 .mr(1) 10079 .nr(16) 10080 .kr(4) 10081 .sr(1) 10082 .m(m) 10083 .n(n) 10084 .k(k) 10085 .iterations(1) 10086 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10087 } 10088 } 10089 } 10090 } 10091 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,n_div_16)10092 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, n_div_16) { 10093 TEST_REQUIRES_ARM_NEON; 10094 for (uint32_t n = 32; n <= 48; n += 16) { 10095 for (size_t k = 1; k <= 80; k += 17) { 10096 GemmMicrokernelTester() 10097 .mr(1) 10098 .nr(16) 10099 .kr(4) 10100 .sr(1) 10101 .m(1) 10102 .n(n) 10103 .k(k) 10104 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10105 } 10106 } 10107 } 10108 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,n_div_16_strided_cn)10109 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, n_div_16_strided_cn) { 10110 TEST_REQUIRES_ARM_NEON; 10111 for (uint32_t n = 32; n <= 48; n += 16) { 10112 for (size_t k = 1; k <= 80; k += 17) { 10113 GemmMicrokernelTester() 10114 .mr(1) 10115 .nr(16) 10116 .kr(4) 10117 .sr(1) 10118 .m(1) 10119 .n(n) 10120 .k(k) 10121 .cn_stride(19) 10122 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10123 } 10124 } 10125 } 10126 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,n_div_16_subtile)10127 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, n_div_16_subtile) { 10128 TEST_REQUIRES_ARM_NEON; 10129 for (uint32_t n = 32; n <= 48; n += 16) { 10130 for (size_t k = 1; k <= 80; k += 17) { 10131 for (uint32_t m = 1; m <= 1; m++) { 10132 GemmMicrokernelTester() 10133 .mr(1) 10134 .nr(16) 10135 .kr(4) 10136 .sr(1) 10137 .m(m) 10138 .n(n) 10139 .k(k) 10140 .iterations(1) 10141 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10142 } 10143 } 10144 } 10145 } 10146 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,small_kernel)10147 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, small_kernel) { 10148 TEST_REQUIRES_ARM_NEON; 10149 for (size_t k = 1; k <= 80; k += 17) { 10150 GemmMicrokernelTester() 10151 .mr(1) 10152 .nr(16) 10153 .kr(4) 10154 .sr(1) 10155 .m(1) 10156 .n(16) 10157 .k(k) 10158 .ks(3) 10159 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10160 } 10161 } 10162 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,small_kernel_subtile)10163 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, small_kernel_subtile) { 10164 TEST_REQUIRES_ARM_NEON; 10165 for (size_t k = 1; k <= 80; k += 17) { 10166 for (uint32_t n = 1; n <= 16; n++) { 10167 for (uint32_t m = 1; m <= 1; m++) { 10168 GemmMicrokernelTester() 10169 .mr(1) 10170 .nr(16) 10171 .kr(4) 10172 .sr(1) 10173 .m(m) 10174 .n(n) 10175 .k(k) 10176 .ks(3) 10177 .iterations(1) 10178 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10179 } 10180 } 10181 } 10182 } 10183 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,n_gt_16_small_kernel)10184 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, n_gt_16_small_kernel) { 10185 TEST_REQUIRES_ARM_NEON; 10186 for (uint32_t n = 17; n < 32; n++) { 10187 for (size_t k = 1; k <= 80; k += 17) { 10188 GemmMicrokernelTester() 10189 .mr(1) 10190 .nr(16) 10191 .kr(4) 10192 .sr(1) 10193 .m(1) 10194 .n(n) 10195 .k(k) 10196 .ks(3) 10197 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10198 } 10199 } 10200 } 10201 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,n_div_16_small_kernel)10202 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, n_div_16_small_kernel) { 10203 TEST_REQUIRES_ARM_NEON; 10204 for (uint32_t n = 32; n <= 48; n += 16) { 10205 for (size_t k = 1; k <= 80; k += 17) { 10206 GemmMicrokernelTester() 10207 .mr(1) 10208 .nr(16) 10209 .kr(4) 10210 .sr(1) 10211 .m(1) 10212 .n(n) 10213 .k(k) 10214 .ks(3) 10215 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10216 } 10217 } 10218 } 10219 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,strided_cm_subtile)10220 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, strided_cm_subtile) { 10221 TEST_REQUIRES_ARM_NEON; 10222 for (size_t k = 1; k <= 80; k += 17) { 10223 for (uint32_t n = 1; n <= 16; n++) { 10224 for (uint32_t m = 1; m <= 1; m++) { 10225 GemmMicrokernelTester() 10226 .mr(1) 10227 .nr(16) 10228 .kr(4) 10229 .sr(1) 10230 .m(m) 10231 .n(n) 10232 .k(k) 10233 .cm_stride(19) 10234 .iterations(1) 10235 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10236 } 10237 } 10238 } 10239 } 10240 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,a_offset)10241 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, a_offset) { 10242 TEST_REQUIRES_ARM_NEON; 10243 for (size_t k = 1; k <= 80; k += 17) { 10244 GemmMicrokernelTester() 10245 .mr(1) 10246 .nr(16) 10247 .kr(4) 10248 .sr(1) 10249 .m(1) 10250 .n(16) 10251 .k(k) 10252 .ks(3) 10253 .a_offset(83) 10254 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10255 } 10256 } 10257 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,zero)10258 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, zero) { 10259 TEST_REQUIRES_ARM_NEON; 10260 for (size_t k = 1; k <= 80; k += 17) { 10261 for (uint32_t mz = 0; mz < 1; mz++) { 10262 GemmMicrokernelTester() 10263 .mr(1) 10264 .nr(16) 10265 .kr(4) 10266 .sr(1) 10267 .m(1) 10268 .n(16) 10269 .k(k) 10270 .ks(3) 10271 .a_offset(83) 10272 .zero_index(mz) 10273 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10274 } 10275 } 10276 } 10277 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,qmin)10278 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, qmin) { 10279 TEST_REQUIRES_ARM_NEON; 10280 GemmMicrokernelTester() 10281 .mr(1) 10282 .nr(16) 10283 .kr(4) 10284 .sr(1) 10285 .m(1) 10286 .n(16) 10287 .k(16) 10288 .qmin(128) 10289 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10290 } 10291 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,qmax)10292 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, qmax) { 10293 TEST_REQUIRES_ARM_NEON; 10294 GemmMicrokernelTester() 10295 .mr(1) 10296 .nr(16) 10297 .kr(4) 10298 .sr(1) 10299 .m(1) 10300 .n(16) 10301 .k(16) 10302 .qmax(128) 10303 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10304 } 10305 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,strided_cm)10306 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, strided_cm) { 10307 TEST_REQUIRES_ARM_NEON; 10308 GemmMicrokernelTester() 10309 .mr(1) 10310 .nr(16) 10311 .kr(4) 10312 .sr(1) 10313 .m(1) 10314 .n(16) 10315 .k(16) 10316 .cm_stride(19) 10317 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10318 } 10319 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 10320 10321 10322 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,k_eq_8)10323 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_eq_8) { 10324 TEST_REQUIRES_ARM_NEON; 10325 GemmMicrokernelTester() 10326 .mr(1) 10327 .nr(8) 10328 .kr(4) 10329 .sr(1) 10330 .m(1) 10331 .n(8) 10332 .k(8) 10333 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10334 } 10335 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,strided_cn)10336 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, strided_cn) { 10337 TEST_REQUIRES_ARM_NEON; 10338 GemmMicrokernelTester() 10339 .mr(1) 10340 .nr(8) 10341 .kr(4) 10342 .sr(1) 10343 .m(1) 10344 .n(8) 10345 .k(8) 10346 .cn_stride(11) 10347 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10348 } 10349 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,k_eq_8_subtile)10350 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_eq_8_subtile) { 10351 TEST_REQUIRES_ARM_NEON; 10352 for (uint32_t n = 1; n <= 8; n++) { 10353 for (uint32_t m = 1; m <= 1; m++) { 10354 GemmMicrokernelTester() 10355 .mr(1) 10356 .nr(8) 10357 .kr(4) 10358 .sr(1) 10359 .m(m) 10360 .n(n) 10361 .k(8) 10362 .iterations(1) 10363 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10364 } 10365 } 10366 } 10367 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,k_eq_8_subtile_m)10368 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_eq_8_subtile_m) { 10369 TEST_REQUIRES_ARM_NEON; 10370 for (uint32_t m = 1; m <= 1; m++) { 10371 GemmMicrokernelTester() 10372 .mr(1) 10373 .nr(8) 10374 .kr(4) 10375 .sr(1) 10376 .m(m) 10377 .n(8) 10378 .k(8) 10379 .iterations(1) 10380 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10381 } 10382 } 10383 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,k_eq_8_subtile_n)10384 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_eq_8_subtile_n) { 10385 TEST_REQUIRES_ARM_NEON; 10386 for (uint32_t n = 1; n <= 8; n++) { 10387 GemmMicrokernelTester() 10388 .mr(1) 10389 .nr(8) 10390 .kr(4) 10391 .sr(1) 10392 .m(1) 10393 .n(n) 10394 .k(8) 10395 .iterations(1) 10396 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10397 } 10398 } 10399 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,k_lt_8)10400 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_lt_8) { 10401 TEST_REQUIRES_ARM_NEON; 10402 for (size_t k = 1; k < 8; k++) { 10403 GemmMicrokernelTester() 10404 .mr(1) 10405 .nr(8) 10406 .kr(4) 10407 .sr(1) 10408 .m(1) 10409 .n(8) 10410 .k(k) 10411 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10412 } 10413 } 10414 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,k_lt_8_subtile)10415 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_lt_8_subtile) { 10416 TEST_REQUIRES_ARM_NEON; 10417 for (size_t k = 1; k < 8; k++) { 10418 for (uint32_t n = 1; n <= 8; n++) { 10419 for (uint32_t m = 1; m <= 1; m++) { 10420 GemmMicrokernelTester() 10421 .mr(1) 10422 .nr(8) 10423 .kr(4) 10424 .sr(1) 10425 .m(m) 10426 .n(n) 10427 .k(k) 10428 .iterations(1) 10429 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10430 } 10431 } 10432 } 10433 } 10434 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,k_gt_8)10435 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_gt_8) { 10436 TEST_REQUIRES_ARM_NEON; 10437 for (size_t k = 9; k < 16; k++) { 10438 GemmMicrokernelTester() 10439 .mr(1) 10440 .nr(8) 10441 .kr(4) 10442 .sr(1) 10443 .m(1) 10444 .n(8) 10445 .k(k) 10446 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10447 } 10448 } 10449 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,k_gt_8_subtile)10450 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_gt_8_subtile) { 10451 TEST_REQUIRES_ARM_NEON; 10452 for (size_t k = 9; k < 16; k++) { 10453 for (uint32_t n = 1; n <= 8; n++) { 10454 for (uint32_t m = 1; m <= 1; m++) { 10455 GemmMicrokernelTester() 10456 .mr(1) 10457 .nr(8) 10458 .kr(4) 10459 .sr(1) 10460 .m(m) 10461 .n(n) 10462 .k(k) 10463 .iterations(1) 10464 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10465 } 10466 } 10467 } 10468 } 10469 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,k_div_8)10470 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_div_8) { 10471 TEST_REQUIRES_ARM_NEON; 10472 for (size_t k = 16; k <= 80; k += 8) { 10473 GemmMicrokernelTester() 10474 .mr(1) 10475 .nr(8) 10476 .kr(4) 10477 .sr(1) 10478 .m(1) 10479 .n(8) 10480 .k(k) 10481 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10482 } 10483 } 10484 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,k_div_8_subtile)10485 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, k_div_8_subtile) { 10486 TEST_REQUIRES_ARM_NEON; 10487 for (size_t k = 16; k <= 80; k += 8) { 10488 for (uint32_t n = 1; n <= 8; n++) { 10489 for (uint32_t m = 1; m <= 1; m++) { 10490 GemmMicrokernelTester() 10491 .mr(1) 10492 .nr(8) 10493 .kr(4) 10494 .sr(1) 10495 .m(m) 10496 .n(n) 10497 .k(k) 10498 .iterations(1) 10499 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10500 } 10501 } 10502 } 10503 } 10504 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,n_gt_8)10505 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_gt_8) { 10506 TEST_REQUIRES_ARM_NEON; 10507 for (uint32_t n = 9; n < 16; n++) { 10508 for (size_t k = 1; k <= 40; k += 9) { 10509 GemmMicrokernelTester() 10510 .mr(1) 10511 .nr(8) 10512 .kr(4) 10513 .sr(1) 10514 .m(1) 10515 .n(n) 10516 .k(k) 10517 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10518 } 10519 } 10520 } 10521 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,n_gt_8_strided_cn)10522 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_gt_8_strided_cn) { 10523 TEST_REQUIRES_ARM_NEON; 10524 for (uint32_t n = 9; n < 16; n++) { 10525 for (size_t k = 1; k <= 40; k += 9) { 10526 GemmMicrokernelTester() 10527 .mr(1) 10528 .nr(8) 10529 .kr(4) 10530 .sr(1) 10531 .m(1) 10532 .n(n) 10533 .k(k) 10534 .cn_stride(11) 10535 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10536 } 10537 } 10538 } 10539 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,n_gt_8_subtile)10540 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_gt_8_subtile) { 10541 TEST_REQUIRES_ARM_NEON; 10542 for (uint32_t n = 9; n < 16; n++) { 10543 for (size_t k = 1; k <= 40; k += 9) { 10544 for (uint32_t m = 1; m <= 1; m++) { 10545 GemmMicrokernelTester() 10546 .mr(1) 10547 .nr(8) 10548 .kr(4) 10549 .sr(1) 10550 .m(m) 10551 .n(n) 10552 .k(k) 10553 .iterations(1) 10554 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10555 } 10556 } 10557 } 10558 } 10559 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,n_div_8)10560 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_div_8) { 10561 TEST_REQUIRES_ARM_NEON; 10562 for (uint32_t n = 16; n <= 24; n += 8) { 10563 for (size_t k = 1; k <= 40; k += 9) { 10564 GemmMicrokernelTester() 10565 .mr(1) 10566 .nr(8) 10567 .kr(4) 10568 .sr(1) 10569 .m(1) 10570 .n(n) 10571 .k(k) 10572 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10573 } 10574 } 10575 } 10576 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,n_div_8_strided_cn)10577 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_div_8_strided_cn) { 10578 TEST_REQUIRES_ARM_NEON; 10579 for (uint32_t n = 16; n <= 24; n += 8) { 10580 for (size_t k = 1; k <= 40; k += 9) { 10581 GemmMicrokernelTester() 10582 .mr(1) 10583 .nr(8) 10584 .kr(4) 10585 .sr(1) 10586 .m(1) 10587 .n(n) 10588 .k(k) 10589 .cn_stride(11) 10590 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10591 } 10592 } 10593 } 10594 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,n_div_8_subtile)10595 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_div_8_subtile) { 10596 TEST_REQUIRES_ARM_NEON; 10597 for (uint32_t n = 16; n <= 24; n += 8) { 10598 for (size_t k = 1; k <= 40; k += 9) { 10599 for (uint32_t m = 1; m <= 1; m++) { 10600 GemmMicrokernelTester() 10601 .mr(1) 10602 .nr(8) 10603 .kr(4) 10604 .sr(1) 10605 .m(m) 10606 .n(n) 10607 .k(k) 10608 .iterations(1) 10609 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10610 } 10611 } 10612 } 10613 } 10614 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,small_kernel)10615 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, small_kernel) { 10616 TEST_REQUIRES_ARM_NEON; 10617 for (size_t k = 1; k <= 40; k += 9) { 10618 GemmMicrokernelTester() 10619 .mr(1) 10620 .nr(8) 10621 .kr(4) 10622 .sr(1) 10623 .m(1) 10624 .n(8) 10625 .k(k) 10626 .ks(3) 10627 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10628 } 10629 } 10630 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,small_kernel_subtile)10631 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, small_kernel_subtile) { 10632 TEST_REQUIRES_ARM_NEON; 10633 for (size_t k = 1; k <= 40; k += 9) { 10634 for (uint32_t n = 1; n <= 8; n++) { 10635 for (uint32_t m = 1; m <= 1; m++) { 10636 GemmMicrokernelTester() 10637 .mr(1) 10638 .nr(8) 10639 .kr(4) 10640 .sr(1) 10641 .m(m) 10642 .n(n) 10643 .k(k) 10644 .ks(3) 10645 .iterations(1) 10646 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10647 } 10648 } 10649 } 10650 } 10651 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,n_gt_8_small_kernel)10652 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_gt_8_small_kernel) { 10653 TEST_REQUIRES_ARM_NEON; 10654 for (uint32_t n = 9; n < 16; n++) { 10655 for (size_t k = 1; k <= 40; k += 9) { 10656 GemmMicrokernelTester() 10657 .mr(1) 10658 .nr(8) 10659 .kr(4) 10660 .sr(1) 10661 .m(1) 10662 .n(n) 10663 .k(k) 10664 .ks(3) 10665 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10666 } 10667 } 10668 } 10669 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,n_div_8_small_kernel)10670 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, n_div_8_small_kernel) { 10671 TEST_REQUIRES_ARM_NEON; 10672 for (uint32_t n = 16; n <= 24; n += 8) { 10673 for (size_t k = 1; k <= 40; k += 9) { 10674 GemmMicrokernelTester() 10675 .mr(1) 10676 .nr(8) 10677 .kr(4) 10678 .sr(1) 10679 .m(1) 10680 .n(n) 10681 .k(k) 10682 .ks(3) 10683 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10684 } 10685 } 10686 } 10687 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,strided_cm_subtile)10688 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, strided_cm_subtile) { 10689 TEST_REQUIRES_ARM_NEON; 10690 for (size_t k = 1; k <= 40; k += 9) { 10691 for (uint32_t n = 1; n <= 8; n++) { 10692 for (uint32_t m = 1; m <= 1; m++) { 10693 GemmMicrokernelTester() 10694 .mr(1) 10695 .nr(8) 10696 .kr(4) 10697 .sr(1) 10698 .m(m) 10699 .n(n) 10700 .k(k) 10701 .cm_stride(11) 10702 .iterations(1) 10703 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10704 } 10705 } 10706 } 10707 } 10708 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,a_offset)10709 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, a_offset) { 10710 TEST_REQUIRES_ARM_NEON; 10711 for (size_t k = 1; k <= 40; k += 9) { 10712 GemmMicrokernelTester() 10713 .mr(1) 10714 .nr(8) 10715 .kr(4) 10716 .sr(1) 10717 .m(1) 10718 .n(8) 10719 .k(k) 10720 .ks(3) 10721 .a_offset(43) 10722 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10723 } 10724 } 10725 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,zero)10726 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, zero) { 10727 TEST_REQUIRES_ARM_NEON; 10728 for (size_t k = 1; k <= 40; k += 9) { 10729 for (uint32_t mz = 0; mz < 1; mz++) { 10730 GemmMicrokernelTester() 10731 .mr(1) 10732 .nr(8) 10733 .kr(4) 10734 .sr(1) 10735 .m(1) 10736 .n(8) 10737 .k(k) 10738 .ks(3) 10739 .a_offset(43) 10740 .zero_index(mz) 10741 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10742 } 10743 } 10744 } 10745 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,qmin)10746 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, qmin) { 10747 TEST_REQUIRES_ARM_NEON; 10748 GemmMicrokernelTester() 10749 .mr(1) 10750 .nr(8) 10751 .kr(4) 10752 .sr(1) 10753 .m(1) 10754 .n(8) 10755 .k(8) 10756 .qmin(128) 10757 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10758 } 10759 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,qmax)10760 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, qmax) { 10761 TEST_REQUIRES_ARM_NEON; 10762 GemmMicrokernelTester() 10763 .mr(1) 10764 .nr(8) 10765 .kr(4) 10766 .sr(1) 10767 .m(1) 10768 .n(8) 10769 .k(8) 10770 .qmax(128) 10771 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10772 } 10773 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R,strided_cm)10774 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD1R, strided_cm) { 10775 TEST_REQUIRES_ARM_NEON; 10776 GemmMicrokernelTester() 10777 .mr(1) 10778 .nr(8) 10779 .kr(4) 10780 .sr(1) 10781 .m(1) 10782 .n(8) 10783 .k(8) 10784 .cm_stride(11) 10785 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10786 } 10787 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 10788 10789 10790 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,k_eq_16)10791 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_eq_16) { 10792 TEST_REQUIRES_ARM_NEON; 10793 GemmMicrokernelTester() 10794 .mr(1) 10795 .nr(8) 10796 .kr(4) 10797 .sr(1) 10798 .m(1) 10799 .n(8) 10800 .k(16) 10801 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10802 } 10803 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,strided_cn)10804 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, strided_cn) { 10805 TEST_REQUIRES_ARM_NEON; 10806 GemmMicrokernelTester() 10807 .mr(1) 10808 .nr(8) 10809 .kr(4) 10810 .sr(1) 10811 .m(1) 10812 .n(8) 10813 .k(16) 10814 .cn_stride(11) 10815 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10816 } 10817 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,k_eq_16_subtile)10818 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_eq_16_subtile) { 10819 TEST_REQUIRES_ARM_NEON; 10820 for (uint32_t n = 1; n <= 8; n++) { 10821 for (uint32_t m = 1; m <= 1; m++) { 10822 GemmMicrokernelTester() 10823 .mr(1) 10824 .nr(8) 10825 .kr(4) 10826 .sr(1) 10827 .m(m) 10828 .n(n) 10829 .k(16) 10830 .iterations(1) 10831 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10832 } 10833 } 10834 } 10835 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,k_eq_16_subtile_m)10836 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) { 10837 TEST_REQUIRES_ARM_NEON; 10838 for (uint32_t m = 1; m <= 1; m++) { 10839 GemmMicrokernelTester() 10840 .mr(1) 10841 .nr(8) 10842 .kr(4) 10843 .sr(1) 10844 .m(m) 10845 .n(8) 10846 .k(16) 10847 .iterations(1) 10848 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10849 } 10850 } 10851 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,k_eq_16_subtile_n)10852 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) { 10853 TEST_REQUIRES_ARM_NEON; 10854 for (uint32_t n = 1; n <= 8; n++) { 10855 GemmMicrokernelTester() 10856 .mr(1) 10857 .nr(8) 10858 .kr(4) 10859 .sr(1) 10860 .m(1) 10861 .n(n) 10862 .k(16) 10863 .iterations(1) 10864 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10865 } 10866 } 10867 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,k_lt_16)10868 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_lt_16) { 10869 TEST_REQUIRES_ARM_NEON; 10870 for (size_t k = 1; k < 16; k++) { 10871 GemmMicrokernelTester() 10872 .mr(1) 10873 .nr(8) 10874 .kr(4) 10875 .sr(1) 10876 .m(1) 10877 .n(8) 10878 .k(k) 10879 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10880 } 10881 } 10882 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,k_lt_16_subtile)10883 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_lt_16_subtile) { 10884 TEST_REQUIRES_ARM_NEON; 10885 for (size_t k = 1; k < 16; k++) { 10886 for (uint32_t n = 1; n <= 8; n++) { 10887 for (uint32_t m = 1; m <= 1; m++) { 10888 GemmMicrokernelTester() 10889 .mr(1) 10890 .nr(8) 10891 .kr(4) 10892 .sr(1) 10893 .m(m) 10894 .n(n) 10895 .k(k) 10896 .iterations(1) 10897 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10898 } 10899 } 10900 } 10901 } 10902 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,k_gt_16)10903 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_gt_16) { 10904 TEST_REQUIRES_ARM_NEON; 10905 for (size_t k = 17; k < 32; k++) { 10906 GemmMicrokernelTester() 10907 .mr(1) 10908 .nr(8) 10909 .kr(4) 10910 .sr(1) 10911 .m(1) 10912 .n(8) 10913 .k(k) 10914 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10915 } 10916 } 10917 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,k_gt_16_subtile)10918 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_gt_16_subtile) { 10919 TEST_REQUIRES_ARM_NEON; 10920 for (size_t k = 17; k < 32; k++) { 10921 for (uint32_t n = 1; n <= 8; n++) { 10922 for (uint32_t m = 1; m <= 1; m++) { 10923 GemmMicrokernelTester() 10924 .mr(1) 10925 .nr(8) 10926 .kr(4) 10927 .sr(1) 10928 .m(m) 10929 .n(n) 10930 .k(k) 10931 .iterations(1) 10932 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10933 } 10934 } 10935 } 10936 } 10937 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,k_div_16)10938 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_div_16) { 10939 TEST_REQUIRES_ARM_NEON; 10940 for (size_t k = 32; k <= 160; k += 16) { 10941 GemmMicrokernelTester() 10942 .mr(1) 10943 .nr(8) 10944 .kr(4) 10945 .sr(1) 10946 .m(1) 10947 .n(8) 10948 .k(k) 10949 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10950 } 10951 } 10952 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,k_div_16_subtile)10953 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, k_div_16_subtile) { 10954 TEST_REQUIRES_ARM_NEON; 10955 for (size_t k = 32; k <= 160; k += 16) { 10956 for (uint32_t n = 1; n <= 8; n++) { 10957 for (uint32_t m = 1; m <= 1; m++) { 10958 GemmMicrokernelTester() 10959 .mr(1) 10960 .nr(8) 10961 .kr(4) 10962 .sr(1) 10963 .m(m) 10964 .n(n) 10965 .k(k) 10966 .iterations(1) 10967 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10968 } 10969 } 10970 } 10971 } 10972 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,n_gt_8)10973 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_gt_8) { 10974 TEST_REQUIRES_ARM_NEON; 10975 for (uint32_t n = 9; n < 16; n++) { 10976 for (size_t k = 1; k <= 80; k += 17) { 10977 GemmMicrokernelTester() 10978 .mr(1) 10979 .nr(8) 10980 .kr(4) 10981 .sr(1) 10982 .m(1) 10983 .n(n) 10984 .k(k) 10985 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10986 } 10987 } 10988 } 10989 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,n_gt_8_strided_cn)10990 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_gt_8_strided_cn) { 10991 TEST_REQUIRES_ARM_NEON; 10992 for (uint32_t n = 9; n < 16; n++) { 10993 for (size_t k = 1; k <= 80; k += 17) { 10994 GemmMicrokernelTester() 10995 .mr(1) 10996 .nr(8) 10997 .kr(4) 10998 .sr(1) 10999 .m(1) 11000 .n(n) 11001 .k(k) 11002 .cn_stride(11) 11003 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11004 } 11005 } 11006 } 11007 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,n_gt_8_subtile)11008 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_gt_8_subtile) { 11009 TEST_REQUIRES_ARM_NEON; 11010 for (uint32_t n = 9; n < 16; n++) { 11011 for (size_t k = 1; k <= 80; k += 17) { 11012 for (uint32_t m = 1; m <= 1; m++) { 11013 GemmMicrokernelTester() 11014 .mr(1) 11015 .nr(8) 11016 .kr(4) 11017 .sr(1) 11018 .m(m) 11019 .n(n) 11020 .k(k) 11021 .iterations(1) 11022 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11023 } 11024 } 11025 } 11026 } 11027 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,n_div_8)11028 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_div_8) { 11029 TEST_REQUIRES_ARM_NEON; 11030 for (uint32_t n = 16; n <= 24; n += 8) { 11031 for (size_t k = 1; k <= 80; k += 17) { 11032 GemmMicrokernelTester() 11033 .mr(1) 11034 .nr(8) 11035 .kr(4) 11036 .sr(1) 11037 .m(1) 11038 .n(n) 11039 .k(k) 11040 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11041 } 11042 } 11043 } 11044 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,n_div_8_strided_cn)11045 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_div_8_strided_cn) { 11046 TEST_REQUIRES_ARM_NEON; 11047 for (uint32_t n = 16; n <= 24; n += 8) { 11048 for (size_t k = 1; k <= 80; k += 17) { 11049 GemmMicrokernelTester() 11050 .mr(1) 11051 .nr(8) 11052 .kr(4) 11053 .sr(1) 11054 .m(1) 11055 .n(n) 11056 .k(k) 11057 .cn_stride(11) 11058 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11059 } 11060 } 11061 } 11062 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,n_div_8_subtile)11063 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_div_8_subtile) { 11064 TEST_REQUIRES_ARM_NEON; 11065 for (uint32_t n = 16; n <= 24; n += 8) { 11066 for (size_t k = 1; k <= 80; k += 17) { 11067 for (uint32_t m = 1; m <= 1; m++) { 11068 GemmMicrokernelTester() 11069 .mr(1) 11070 .nr(8) 11071 .kr(4) 11072 .sr(1) 11073 .m(m) 11074 .n(n) 11075 .k(k) 11076 .iterations(1) 11077 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11078 } 11079 } 11080 } 11081 } 11082 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,small_kernel)11083 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, small_kernel) { 11084 TEST_REQUIRES_ARM_NEON; 11085 for (size_t k = 1; k <= 80; k += 17) { 11086 GemmMicrokernelTester() 11087 .mr(1) 11088 .nr(8) 11089 .kr(4) 11090 .sr(1) 11091 .m(1) 11092 .n(8) 11093 .k(k) 11094 .ks(3) 11095 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11096 } 11097 } 11098 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,small_kernel_subtile)11099 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, small_kernel_subtile) { 11100 TEST_REQUIRES_ARM_NEON; 11101 for (size_t k = 1; k <= 80; k += 17) { 11102 for (uint32_t n = 1; n <= 8; n++) { 11103 for (uint32_t m = 1; m <= 1; m++) { 11104 GemmMicrokernelTester() 11105 .mr(1) 11106 .nr(8) 11107 .kr(4) 11108 .sr(1) 11109 .m(m) 11110 .n(n) 11111 .k(k) 11112 .ks(3) 11113 .iterations(1) 11114 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11115 } 11116 } 11117 } 11118 } 11119 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,n_gt_8_small_kernel)11120 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_gt_8_small_kernel) { 11121 TEST_REQUIRES_ARM_NEON; 11122 for (uint32_t n = 9; n < 16; n++) { 11123 for (size_t k = 1; k <= 80; k += 17) { 11124 GemmMicrokernelTester() 11125 .mr(1) 11126 .nr(8) 11127 .kr(4) 11128 .sr(1) 11129 .m(1) 11130 .n(n) 11131 .k(k) 11132 .ks(3) 11133 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11134 } 11135 } 11136 } 11137 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,n_div_8_small_kernel)11138 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, n_div_8_small_kernel) { 11139 TEST_REQUIRES_ARM_NEON; 11140 for (uint32_t n = 16; n <= 24; n += 8) { 11141 for (size_t k = 1; k <= 80; k += 17) { 11142 GemmMicrokernelTester() 11143 .mr(1) 11144 .nr(8) 11145 .kr(4) 11146 .sr(1) 11147 .m(1) 11148 .n(n) 11149 .k(k) 11150 .ks(3) 11151 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11152 } 11153 } 11154 } 11155 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,strided_cm_subtile)11156 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, strided_cm_subtile) { 11157 TEST_REQUIRES_ARM_NEON; 11158 for (size_t k = 1; k <= 80; k += 17) { 11159 for (uint32_t n = 1; n <= 8; n++) { 11160 for (uint32_t m = 1; m <= 1; m++) { 11161 GemmMicrokernelTester() 11162 .mr(1) 11163 .nr(8) 11164 .kr(4) 11165 .sr(1) 11166 .m(m) 11167 .n(n) 11168 .k(k) 11169 .cm_stride(11) 11170 .iterations(1) 11171 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11172 } 11173 } 11174 } 11175 } 11176 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,a_offset)11177 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, a_offset) { 11178 TEST_REQUIRES_ARM_NEON; 11179 for (size_t k = 1; k <= 80; k += 17) { 11180 GemmMicrokernelTester() 11181 .mr(1) 11182 .nr(8) 11183 .kr(4) 11184 .sr(1) 11185 .m(1) 11186 .n(8) 11187 .k(k) 11188 .ks(3) 11189 .a_offset(83) 11190 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11191 } 11192 } 11193 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,zero)11194 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, zero) { 11195 TEST_REQUIRES_ARM_NEON; 11196 for (size_t k = 1; k <= 80; k += 17) { 11197 for (uint32_t mz = 0; mz < 1; mz++) { 11198 GemmMicrokernelTester() 11199 .mr(1) 11200 .nr(8) 11201 .kr(4) 11202 .sr(1) 11203 .m(1) 11204 .n(8) 11205 .k(k) 11206 .ks(3) 11207 .a_offset(83) 11208 .zero_index(mz) 11209 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11210 } 11211 } 11212 } 11213 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,qmin)11214 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, qmin) { 11215 TEST_REQUIRES_ARM_NEON; 11216 GemmMicrokernelTester() 11217 .mr(1) 11218 .nr(8) 11219 .kr(4) 11220 .sr(1) 11221 .m(1) 11222 .n(8) 11223 .k(16) 11224 .qmin(128) 11225 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11226 } 11227 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,qmax)11228 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, qmax) { 11229 TEST_REQUIRES_ARM_NEON; 11230 GemmMicrokernelTester() 11231 .mr(1) 11232 .nr(8) 11233 .kr(4) 11234 .sr(1) 11235 .m(1) 11236 .n(8) 11237 .k(16) 11238 .qmax(128) 11239 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11240 } 11241 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R,strided_cm)11242 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD1R, strided_cm) { 11243 TEST_REQUIRES_ARM_NEON; 11244 GemmMicrokernelTester() 11245 .mr(1) 11246 .nr(8) 11247 .kr(4) 11248 .sr(1) 11249 .m(1) 11250 .n(8) 11251 .k(16) 11252 .cm_stride(11) 11253 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11254 } 11255 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 11256 11257 11258 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,k_eq_8)11259 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, k_eq_8) { 11260 TEST_REQUIRES_ARM_NEON; 11261 GemmMicrokernelTester() 11262 .mr(2) 11263 .nr(8) 11264 .kr(4) 11265 .sr(1) 11266 .m(2) 11267 .n(8) 11268 .k(8) 11269 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11270 } 11271 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,strided_cn)11272 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, strided_cn) { 11273 TEST_REQUIRES_ARM_NEON; 11274 GemmMicrokernelTester() 11275 .mr(2) 11276 .nr(8) 11277 .kr(4) 11278 .sr(1) 11279 .m(2) 11280 .n(8) 11281 .k(8) 11282 .cn_stride(11) 11283 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11284 } 11285 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,k_eq_8_subtile)11286 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, k_eq_8_subtile) { 11287 TEST_REQUIRES_ARM_NEON; 11288 for (uint32_t n = 1; n <= 8; n++) { 11289 for (uint32_t m = 1; m <= 2; m++) { 11290 GemmMicrokernelTester() 11291 .mr(2) 11292 .nr(8) 11293 .kr(4) 11294 .sr(1) 11295 .m(m) 11296 .n(n) 11297 .k(8) 11298 .iterations(1) 11299 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11300 } 11301 } 11302 } 11303 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,k_eq_8_subtile_m)11304 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, k_eq_8_subtile_m) { 11305 TEST_REQUIRES_ARM_NEON; 11306 for (uint32_t m = 1; m <= 2; m++) { 11307 GemmMicrokernelTester() 11308 .mr(2) 11309 .nr(8) 11310 .kr(4) 11311 .sr(1) 11312 .m(m) 11313 .n(8) 11314 .k(8) 11315 .iterations(1) 11316 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11317 } 11318 } 11319 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,k_eq_8_subtile_n)11320 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, k_eq_8_subtile_n) { 11321 TEST_REQUIRES_ARM_NEON; 11322 for (uint32_t n = 1; n <= 8; n++) { 11323 GemmMicrokernelTester() 11324 .mr(2) 11325 .nr(8) 11326 .kr(4) 11327 .sr(1) 11328 .m(2) 11329 .n(n) 11330 .k(8) 11331 .iterations(1) 11332 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11333 } 11334 } 11335 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,k_lt_8)11336 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, k_lt_8) { 11337 TEST_REQUIRES_ARM_NEON; 11338 for (size_t k = 1; k < 8; k++) { 11339 GemmMicrokernelTester() 11340 .mr(2) 11341 .nr(8) 11342 .kr(4) 11343 .sr(1) 11344 .m(2) 11345 .n(8) 11346 .k(k) 11347 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11348 } 11349 } 11350 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,k_lt_8_subtile)11351 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, k_lt_8_subtile) { 11352 TEST_REQUIRES_ARM_NEON; 11353 for (size_t k = 1; k < 8; k++) { 11354 for (uint32_t n = 1; n <= 8; n++) { 11355 for (uint32_t m = 1; m <= 2; m++) { 11356 GemmMicrokernelTester() 11357 .mr(2) 11358 .nr(8) 11359 .kr(4) 11360 .sr(1) 11361 .m(m) 11362 .n(n) 11363 .k(k) 11364 .iterations(1) 11365 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11366 } 11367 } 11368 } 11369 } 11370 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,k_gt_8)11371 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, k_gt_8) { 11372 TEST_REQUIRES_ARM_NEON; 11373 for (size_t k = 9; k < 16; k++) { 11374 GemmMicrokernelTester() 11375 .mr(2) 11376 .nr(8) 11377 .kr(4) 11378 .sr(1) 11379 .m(2) 11380 .n(8) 11381 .k(k) 11382 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11383 } 11384 } 11385 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,k_gt_8_subtile)11386 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, k_gt_8_subtile) { 11387 TEST_REQUIRES_ARM_NEON; 11388 for (size_t k = 9; k < 16; k++) { 11389 for (uint32_t n = 1; n <= 8; n++) { 11390 for (uint32_t m = 1; m <= 2; m++) { 11391 GemmMicrokernelTester() 11392 .mr(2) 11393 .nr(8) 11394 .kr(4) 11395 .sr(1) 11396 .m(m) 11397 .n(n) 11398 .k(k) 11399 .iterations(1) 11400 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11401 } 11402 } 11403 } 11404 } 11405 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,k_div_8)11406 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, k_div_8) { 11407 TEST_REQUIRES_ARM_NEON; 11408 for (size_t k = 16; k <= 80; k += 8) { 11409 GemmMicrokernelTester() 11410 .mr(2) 11411 .nr(8) 11412 .kr(4) 11413 .sr(1) 11414 .m(2) 11415 .n(8) 11416 .k(k) 11417 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11418 } 11419 } 11420 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,k_div_8_subtile)11421 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, k_div_8_subtile) { 11422 TEST_REQUIRES_ARM_NEON; 11423 for (size_t k = 16; k <= 80; k += 8) { 11424 for (uint32_t n = 1; n <= 8; n++) { 11425 for (uint32_t m = 1; m <= 2; m++) { 11426 GemmMicrokernelTester() 11427 .mr(2) 11428 .nr(8) 11429 .kr(4) 11430 .sr(1) 11431 .m(m) 11432 .n(n) 11433 .k(k) 11434 .iterations(1) 11435 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11436 } 11437 } 11438 } 11439 } 11440 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,n_gt_8)11441 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, n_gt_8) { 11442 TEST_REQUIRES_ARM_NEON; 11443 for (uint32_t n = 9; n < 16; n++) { 11444 for (size_t k = 1; k <= 40; k += 9) { 11445 GemmMicrokernelTester() 11446 .mr(2) 11447 .nr(8) 11448 .kr(4) 11449 .sr(1) 11450 .m(2) 11451 .n(n) 11452 .k(k) 11453 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11454 } 11455 } 11456 } 11457 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,n_gt_8_strided_cn)11458 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, n_gt_8_strided_cn) { 11459 TEST_REQUIRES_ARM_NEON; 11460 for (uint32_t n = 9; n < 16; n++) { 11461 for (size_t k = 1; k <= 40; k += 9) { 11462 GemmMicrokernelTester() 11463 .mr(2) 11464 .nr(8) 11465 .kr(4) 11466 .sr(1) 11467 .m(2) 11468 .n(n) 11469 .k(k) 11470 .cn_stride(11) 11471 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11472 } 11473 } 11474 } 11475 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,n_gt_8_subtile)11476 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, n_gt_8_subtile) { 11477 TEST_REQUIRES_ARM_NEON; 11478 for (uint32_t n = 9; n < 16; n++) { 11479 for (size_t k = 1; k <= 40; k += 9) { 11480 for (uint32_t m = 1; m <= 2; m++) { 11481 GemmMicrokernelTester() 11482 .mr(2) 11483 .nr(8) 11484 .kr(4) 11485 .sr(1) 11486 .m(m) 11487 .n(n) 11488 .k(k) 11489 .iterations(1) 11490 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11491 } 11492 } 11493 } 11494 } 11495 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,n_div_8)11496 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, n_div_8) { 11497 TEST_REQUIRES_ARM_NEON; 11498 for (uint32_t n = 16; n <= 24; n += 8) { 11499 for (size_t k = 1; k <= 40; k += 9) { 11500 GemmMicrokernelTester() 11501 .mr(2) 11502 .nr(8) 11503 .kr(4) 11504 .sr(1) 11505 .m(2) 11506 .n(n) 11507 .k(k) 11508 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11509 } 11510 } 11511 } 11512 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,n_div_8_strided_cn)11513 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, n_div_8_strided_cn) { 11514 TEST_REQUIRES_ARM_NEON; 11515 for (uint32_t n = 16; n <= 24; n += 8) { 11516 for (size_t k = 1; k <= 40; k += 9) { 11517 GemmMicrokernelTester() 11518 .mr(2) 11519 .nr(8) 11520 .kr(4) 11521 .sr(1) 11522 .m(2) 11523 .n(n) 11524 .k(k) 11525 .cn_stride(11) 11526 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11527 } 11528 } 11529 } 11530 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,n_div_8_subtile)11531 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, n_div_8_subtile) { 11532 TEST_REQUIRES_ARM_NEON; 11533 for (uint32_t n = 16; n <= 24; n += 8) { 11534 for (size_t k = 1; k <= 40; k += 9) { 11535 for (uint32_t m = 1; m <= 2; m++) { 11536 GemmMicrokernelTester() 11537 .mr(2) 11538 .nr(8) 11539 .kr(4) 11540 .sr(1) 11541 .m(m) 11542 .n(n) 11543 .k(k) 11544 .iterations(1) 11545 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11546 } 11547 } 11548 } 11549 } 11550 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,small_kernel)11551 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, small_kernel) { 11552 TEST_REQUIRES_ARM_NEON; 11553 for (size_t k = 1; k <= 40; k += 9) { 11554 GemmMicrokernelTester() 11555 .mr(2) 11556 .nr(8) 11557 .kr(4) 11558 .sr(1) 11559 .m(2) 11560 .n(8) 11561 .k(k) 11562 .ks(3) 11563 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11564 } 11565 } 11566 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,small_kernel_subtile)11567 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, small_kernel_subtile) { 11568 TEST_REQUIRES_ARM_NEON; 11569 for (size_t k = 1; k <= 40; k += 9) { 11570 for (uint32_t n = 1; n <= 8; n++) { 11571 for (uint32_t m = 1; m <= 2; m++) { 11572 GemmMicrokernelTester() 11573 .mr(2) 11574 .nr(8) 11575 .kr(4) 11576 .sr(1) 11577 .m(m) 11578 .n(n) 11579 .k(k) 11580 .ks(3) 11581 .iterations(1) 11582 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11583 } 11584 } 11585 } 11586 } 11587 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,n_gt_8_small_kernel)11588 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, n_gt_8_small_kernel) { 11589 TEST_REQUIRES_ARM_NEON; 11590 for (uint32_t n = 9; n < 16; n++) { 11591 for (size_t k = 1; k <= 40; k += 9) { 11592 GemmMicrokernelTester() 11593 .mr(2) 11594 .nr(8) 11595 .kr(4) 11596 .sr(1) 11597 .m(2) 11598 .n(n) 11599 .k(k) 11600 .ks(3) 11601 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11602 } 11603 } 11604 } 11605 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,n_div_8_small_kernel)11606 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, n_div_8_small_kernel) { 11607 TEST_REQUIRES_ARM_NEON; 11608 for (uint32_t n = 16; n <= 24; n += 8) { 11609 for (size_t k = 1; k <= 40; k += 9) { 11610 GemmMicrokernelTester() 11611 .mr(2) 11612 .nr(8) 11613 .kr(4) 11614 .sr(1) 11615 .m(2) 11616 .n(n) 11617 .k(k) 11618 .ks(3) 11619 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11620 } 11621 } 11622 } 11623 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,strided_cm_subtile)11624 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, strided_cm_subtile) { 11625 TEST_REQUIRES_ARM_NEON; 11626 for (size_t k = 1; k <= 40; k += 9) { 11627 for (uint32_t n = 1; n <= 8; n++) { 11628 for (uint32_t m = 1; m <= 2; m++) { 11629 GemmMicrokernelTester() 11630 .mr(2) 11631 .nr(8) 11632 .kr(4) 11633 .sr(1) 11634 .m(m) 11635 .n(n) 11636 .k(k) 11637 .cm_stride(11) 11638 .iterations(1) 11639 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11640 } 11641 } 11642 } 11643 } 11644 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,a_offset)11645 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, a_offset) { 11646 TEST_REQUIRES_ARM_NEON; 11647 for (size_t k = 1; k <= 40; k += 9) { 11648 GemmMicrokernelTester() 11649 .mr(2) 11650 .nr(8) 11651 .kr(4) 11652 .sr(1) 11653 .m(2) 11654 .n(8) 11655 .k(k) 11656 .ks(3) 11657 .a_offset(83) 11658 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11659 } 11660 } 11661 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,zero)11662 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, zero) { 11663 TEST_REQUIRES_ARM_NEON; 11664 for (size_t k = 1; k <= 40; k += 9) { 11665 for (uint32_t mz = 0; mz < 2; mz++) { 11666 GemmMicrokernelTester() 11667 .mr(2) 11668 .nr(8) 11669 .kr(4) 11670 .sr(1) 11671 .m(2) 11672 .n(8) 11673 .k(k) 11674 .ks(3) 11675 .a_offset(83) 11676 .zero_index(mz) 11677 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11678 } 11679 } 11680 } 11681 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,qmin)11682 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, qmin) { 11683 TEST_REQUIRES_ARM_NEON; 11684 GemmMicrokernelTester() 11685 .mr(2) 11686 .nr(8) 11687 .kr(4) 11688 .sr(1) 11689 .m(2) 11690 .n(8) 11691 .k(8) 11692 .qmin(128) 11693 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11694 } 11695 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,qmax)11696 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, qmax) { 11697 TEST_REQUIRES_ARM_NEON; 11698 GemmMicrokernelTester() 11699 .mr(2) 11700 .nr(8) 11701 .kr(4) 11702 .sr(1) 11703 .m(2) 11704 .n(8) 11705 .k(8) 11706 .qmax(128) 11707 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11708 } 11709 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R,strided_cm)11710 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD2R, strided_cm) { 11711 TEST_REQUIRES_ARM_NEON; 11712 GemmMicrokernelTester() 11713 .mr(2) 11714 .nr(8) 11715 .kr(4) 11716 .sr(1) 11717 .m(2) 11718 .n(8) 11719 .k(8) 11720 .cm_stride(11) 11721 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11722 } 11723 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 11724 11725 11726 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,k_eq_16)11727 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, k_eq_16) { 11728 TEST_REQUIRES_ARM_NEON; 11729 GemmMicrokernelTester() 11730 .mr(2) 11731 .nr(8) 11732 .kr(4) 11733 .sr(1) 11734 .m(2) 11735 .n(8) 11736 .k(16) 11737 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11738 } 11739 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,strided_cn)11740 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, strided_cn) { 11741 TEST_REQUIRES_ARM_NEON; 11742 GemmMicrokernelTester() 11743 .mr(2) 11744 .nr(8) 11745 .kr(4) 11746 .sr(1) 11747 .m(2) 11748 .n(8) 11749 .k(16) 11750 .cn_stride(11) 11751 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11752 } 11753 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,k_eq_16_subtile)11754 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, k_eq_16_subtile) { 11755 TEST_REQUIRES_ARM_NEON; 11756 for (uint32_t n = 1; n <= 8; n++) { 11757 for (uint32_t m = 1; m <= 2; m++) { 11758 GemmMicrokernelTester() 11759 .mr(2) 11760 .nr(8) 11761 .kr(4) 11762 .sr(1) 11763 .m(m) 11764 .n(n) 11765 .k(16) 11766 .iterations(1) 11767 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11768 } 11769 } 11770 } 11771 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_m)11772 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) { 11773 TEST_REQUIRES_ARM_NEON; 11774 for (uint32_t m = 1; m <= 2; m++) { 11775 GemmMicrokernelTester() 11776 .mr(2) 11777 .nr(8) 11778 .kr(4) 11779 .sr(1) 11780 .m(m) 11781 .n(8) 11782 .k(16) 11783 .iterations(1) 11784 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11785 } 11786 } 11787 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_n)11788 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) { 11789 TEST_REQUIRES_ARM_NEON; 11790 for (uint32_t n = 1; n <= 8; n++) { 11791 GemmMicrokernelTester() 11792 .mr(2) 11793 .nr(8) 11794 .kr(4) 11795 .sr(1) 11796 .m(2) 11797 .n(n) 11798 .k(16) 11799 .iterations(1) 11800 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11801 } 11802 } 11803 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,k_lt_16)11804 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, k_lt_16) { 11805 TEST_REQUIRES_ARM_NEON; 11806 for (size_t k = 1; k < 16; k++) { 11807 GemmMicrokernelTester() 11808 .mr(2) 11809 .nr(8) 11810 .kr(4) 11811 .sr(1) 11812 .m(2) 11813 .n(8) 11814 .k(k) 11815 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11816 } 11817 } 11818 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,k_lt_16_subtile)11819 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, k_lt_16_subtile) { 11820 TEST_REQUIRES_ARM_NEON; 11821 for (size_t k = 1; k < 16; k++) { 11822 for (uint32_t n = 1; n <= 8; n++) { 11823 for (uint32_t m = 1; m <= 2; m++) { 11824 GemmMicrokernelTester() 11825 .mr(2) 11826 .nr(8) 11827 .kr(4) 11828 .sr(1) 11829 .m(m) 11830 .n(n) 11831 .k(k) 11832 .iterations(1) 11833 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11834 } 11835 } 11836 } 11837 } 11838 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,k_gt_16)11839 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, k_gt_16) { 11840 TEST_REQUIRES_ARM_NEON; 11841 for (size_t k = 17; k < 32; k++) { 11842 GemmMicrokernelTester() 11843 .mr(2) 11844 .nr(8) 11845 .kr(4) 11846 .sr(1) 11847 .m(2) 11848 .n(8) 11849 .k(k) 11850 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11851 } 11852 } 11853 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,k_gt_16_subtile)11854 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, k_gt_16_subtile) { 11855 TEST_REQUIRES_ARM_NEON; 11856 for (size_t k = 17; k < 32; k++) { 11857 for (uint32_t n = 1; n <= 8; n++) { 11858 for (uint32_t m = 1; m <= 2; m++) { 11859 GemmMicrokernelTester() 11860 .mr(2) 11861 .nr(8) 11862 .kr(4) 11863 .sr(1) 11864 .m(m) 11865 .n(n) 11866 .k(k) 11867 .iterations(1) 11868 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11869 } 11870 } 11871 } 11872 } 11873 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,k_div_16)11874 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, k_div_16) { 11875 TEST_REQUIRES_ARM_NEON; 11876 for (size_t k = 32; k <= 160; k += 16) { 11877 GemmMicrokernelTester() 11878 .mr(2) 11879 .nr(8) 11880 .kr(4) 11881 .sr(1) 11882 .m(2) 11883 .n(8) 11884 .k(k) 11885 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11886 } 11887 } 11888 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,k_div_16_subtile)11889 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, k_div_16_subtile) { 11890 TEST_REQUIRES_ARM_NEON; 11891 for (size_t k = 32; k <= 160; k += 16) { 11892 for (uint32_t n = 1; n <= 8; n++) { 11893 for (uint32_t m = 1; m <= 2; m++) { 11894 GemmMicrokernelTester() 11895 .mr(2) 11896 .nr(8) 11897 .kr(4) 11898 .sr(1) 11899 .m(m) 11900 .n(n) 11901 .k(k) 11902 .iterations(1) 11903 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11904 } 11905 } 11906 } 11907 } 11908 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,n_gt_8)11909 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, n_gt_8) { 11910 TEST_REQUIRES_ARM_NEON; 11911 for (uint32_t n = 9; n < 16; n++) { 11912 for (size_t k = 1; k <= 80; k += 17) { 11913 GemmMicrokernelTester() 11914 .mr(2) 11915 .nr(8) 11916 .kr(4) 11917 .sr(1) 11918 .m(2) 11919 .n(n) 11920 .k(k) 11921 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11922 } 11923 } 11924 } 11925 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,n_gt_8_strided_cn)11926 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, n_gt_8_strided_cn) { 11927 TEST_REQUIRES_ARM_NEON; 11928 for (uint32_t n = 9; n < 16; n++) { 11929 for (size_t k = 1; k <= 80; k += 17) { 11930 GemmMicrokernelTester() 11931 .mr(2) 11932 .nr(8) 11933 .kr(4) 11934 .sr(1) 11935 .m(2) 11936 .n(n) 11937 .k(k) 11938 .cn_stride(11) 11939 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11940 } 11941 } 11942 } 11943 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,n_gt_8_subtile)11944 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, n_gt_8_subtile) { 11945 TEST_REQUIRES_ARM_NEON; 11946 for (uint32_t n = 9; n < 16; n++) { 11947 for (size_t k = 1; k <= 80; k += 17) { 11948 for (uint32_t m = 1; m <= 2; m++) { 11949 GemmMicrokernelTester() 11950 .mr(2) 11951 .nr(8) 11952 .kr(4) 11953 .sr(1) 11954 .m(m) 11955 .n(n) 11956 .k(k) 11957 .iterations(1) 11958 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11959 } 11960 } 11961 } 11962 } 11963 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,n_div_8)11964 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, n_div_8) { 11965 TEST_REQUIRES_ARM_NEON; 11966 for (uint32_t n = 16; n <= 24; n += 8) { 11967 for (size_t k = 1; k <= 80; k += 17) { 11968 GemmMicrokernelTester() 11969 .mr(2) 11970 .nr(8) 11971 .kr(4) 11972 .sr(1) 11973 .m(2) 11974 .n(n) 11975 .k(k) 11976 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11977 } 11978 } 11979 } 11980 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,n_div_8_strided_cn)11981 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, n_div_8_strided_cn) { 11982 TEST_REQUIRES_ARM_NEON; 11983 for (uint32_t n = 16; n <= 24; n += 8) { 11984 for (size_t k = 1; k <= 80; k += 17) { 11985 GemmMicrokernelTester() 11986 .mr(2) 11987 .nr(8) 11988 .kr(4) 11989 .sr(1) 11990 .m(2) 11991 .n(n) 11992 .k(k) 11993 .cn_stride(11) 11994 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11995 } 11996 } 11997 } 11998 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,n_div_8_subtile)11999 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, n_div_8_subtile) { 12000 TEST_REQUIRES_ARM_NEON; 12001 for (uint32_t n = 16; n <= 24; n += 8) { 12002 for (size_t k = 1; k <= 80; k += 17) { 12003 for (uint32_t m = 1; m <= 2; m++) { 12004 GemmMicrokernelTester() 12005 .mr(2) 12006 .nr(8) 12007 .kr(4) 12008 .sr(1) 12009 .m(m) 12010 .n(n) 12011 .k(k) 12012 .iterations(1) 12013 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12014 } 12015 } 12016 } 12017 } 12018 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,small_kernel)12019 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, small_kernel) { 12020 TEST_REQUIRES_ARM_NEON; 12021 for (size_t k = 1; k <= 80; k += 17) { 12022 GemmMicrokernelTester() 12023 .mr(2) 12024 .nr(8) 12025 .kr(4) 12026 .sr(1) 12027 .m(2) 12028 .n(8) 12029 .k(k) 12030 .ks(3) 12031 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12032 } 12033 } 12034 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,small_kernel_subtile)12035 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, small_kernel_subtile) { 12036 TEST_REQUIRES_ARM_NEON; 12037 for (size_t k = 1; k <= 80; k += 17) { 12038 for (uint32_t n = 1; n <= 8; n++) { 12039 for (uint32_t m = 1; m <= 2; m++) { 12040 GemmMicrokernelTester() 12041 .mr(2) 12042 .nr(8) 12043 .kr(4) 12044 .sr(1) 12045 .m(m) 12046 .n(n) 12047 .k(k) 12048 .ks(3) 12049 .iterations(1) 12050 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12051 } 12052 } 12053 } 12054 } 12055 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,n_gt_8_small_kernel)12056 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, n_gt_8_small_kernel) { 12057 TEST_REQUIRES_ARM_NEON; 12058 for (uint32_t n = 9; n < 16; n++) { 12059 for (size_t k = 1; k <= 80; k += 17) { 12060 GemmMicrokernelTester() 12061 .mr(2) 12062 .nr(8) 12063 .kr(4) 12064 .sr(1) 12065 .m(2) 12066 .n(n) 12067 .k(k) 12068 .ks(3) 12069 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12070 } 12071 } 12072 } 12073 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,n_div_8_small_kernel)12074 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, n_div_8_small_kernel) { 12075 TEST_REQUIRES_ARM_NEON; 12076 for (uint32_t n = 16; n <= 24; n += 8) { 12077 for (size_t k = 1; k <= 80; k += 17) { 12078 GemmMicrokernelTester() 12079 .mr(2) 12080 .nr(8) 12081 .kr(4) 12082 .sr(1) 12083 .m(2) 12084 .n(n) 12085 .k(k) 12086 .ks(3) 12087 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12088 } 12089 } 12090 } 12091 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,strided_cm_subtile)12092 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, strided_cm_subtile) { 12093 TEST_REQUIRES_ARM_NEON; 12094 for (size_t k = 1; k <= 80; k += 17) { 12095 for (uint32_t n = 1; n <= 8; n++) { 12096 for (uint32_t m = 1; m <= 2; m++) { 12097 GemmMicrokernelTester() 12098 .mr(2) 12099 .nr(8) 12100 .kr(4) 12101 .sr(1) 12102 .m(m) 12103 .n(n) 12104 .k(k) 12105 .cm_stride(11) 12106 .iterations(1) 12107 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12108 } 12109 } 12110 } 12111 } 12112 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,a_offset)12113 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, a_offset) { 12114 TEST_REQUIRES_ARM_NEON; 12115 for (size_t k = 1; k <= 80; k += 17) { 12116 GemmMicrokernelTester() 12117 .mr(2) 12118 .nr(8) 12119 .kr(4) 12120 .sr(1) 12121 .m(2) 12122 .n(8) 12123 .k(k) 12124 .ks(3) 12125 .a_offset(163) 12126 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12127 } 12128 } 12129 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,zero)12130 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, zero) { 12131 TEST_REQUIRES_ARM_NEON; 12132 for (size_t k = 1; k <= 80; k += 17) { 12133 for (uint32_t mz = 0; mz < 2; mz++) { 12134 GemmMicrokernelTester() 12135 .mr(2) 12136 .nr(8) 12137 .kr(4) 12138 .sr(1) 12139 .m(2) 12140 .n(8) 12141 .k(k) 12142 .ks(3) 12143 .a_offset(163) 12144 .zero_index(mz) 12145 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12146 } 12147 } 12148 } 12149 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,qmin)12150 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, qmin) { 12151 TEST_REQUIRES_ARM_NEON; 12152 GemmMicrokernelTester() 12153 .mr(2) 12154 .nr(8) 12155 .kr(4) 12156 .sr(1) 12157 .m(2) 12158 .n(8) 12159 .k(16) 12160 .qmin(128) 12161 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12162 } 12163 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,qmax)12164 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, qmax) { 12165 TEST_REQUIRES_ARM_NEON; 12166 GemmMicrokernelTester() 12167 .mr(2) 12168 .nr(8) 12169 .kr(4) 12170 .sr(1) 12171 .m(2) 12172 .n(8) 12173 .k(16) 12174 .qmax(128) 12175 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12176 } 12177 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R,strided_cm)12178 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD2R, strided_cm) { 12179 TEST_REQUIRES_ARM_NEON; 12180 GemmMicrokernelTester() 12181 .mr(2) 12182 .nr(8) 12183 .kr(4) 12184 .sr(1) 12185 .m(2) 12186 .n(8) 12187 .k(16) 12188 .cm_stride(11) 12189 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12190 } 12191 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 12192 12193 12194 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,k_eq_8)12195 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_eq_8) { 12196 TEST_REQUIRES_ARM_NEON; 12197 GemmMicrokernelTester() 12198 .mr(4) 12199 .nr(8) 12200 .kr(2) 12201 .sr(1) 12202 .m(4) 12203 .n(8) 12204 .k(8) 12205 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12206 } 12207 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,strided_cn)12208 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, strided_cn) { 12209 TEST_REQUIRES_ARM_NEON; 12210 GemmMicrokernelTester() 12211 .mr(4) 12212 .nr(8) 12213 .kr(2) 12214 .sr(1) 12215 .m(4) 12216 .n(8) 12217 .k(8) 12218 .cn_stride(11) 12219 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12220 } 12221 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,k_eq_8_subtile)12222 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_eq_8_subtile) { 12223 TEST_REQUIRES_ARM_NEON; 12224 for (uint32_t n = 1; n <= 8; n++) { 12225 for (uint32_t m = 1; m <= 4; m++) { 12226 GemmMicrokernelTester() 12227 .mr(4) 12228 .nr(8) 12229 .kr(2) 12230 .sr(1) 12231 .m(m) 12232 .n(n) 12233 .k(8) 12234 .iterations(1) 12235 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12236 } 12237 } 12238 } 12239 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,k_eq_8_subtile_m)12240 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_eq_8_subtile_m) { 12241 TEST_REQUIRES_ARM_NEON; 12242 for (uint32_t m = 1; m <= 4; m++) { 12243 GemmMicrokernelTester() 12244 .mr(4) 12245 .nr(8) 12246 .kr(2) 12247 .sr(1) 12248 .m(m) 12249 .n(8) 12250 .k(8) 12251 .iterations(1) 12252 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12253 } 12254 } 12255 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,k_eq_8_subtile_n)12256 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_eq_8_subtile_n) { 12257 TEST_REQUIRES_ARM_NEON; 12258 for (uint32_t n = 1; n <= 8; n++) { 12259 GemmMicrokernelTester() 12260 .mr(4) 12261 .nr(8) 12262 .kr(2) 12263 .sr(1) 12264 .m(4) 12265 .n(n) 12266 .k(8) 12267 .iterations(1) 12268 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12269 } 12270 } 12271 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,k_lt_8)12272 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_lt_8) { 12273 TEST_REQUIRES_ARM_NEON; 12274 for (size_t k = 1; k < 8; k++) { 12275 GemmMicrokernelTester() 12276 .mr(4) 12277 .nr(8) 12278 .kr(2) 12279 .sr(1) 12280 .m(4) 12281 .n(8) 12282 .k(k) 12283 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12284 } 12285 } 12286 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,k_lt_8_subtile)12287 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_lt_8_subtile) { 12288 TEST_REQUIRES_ARM_NEON; 12289 for (size_t k = 1; k < 8; k++) { 12290 for (uint32_t n = 1; n <= 8; n++) { 12291 for (uint32_t m = 1; m <= 4; m++) { 12292 GemmMicrokernelTester() 12293 .mr(4) 12294 .nr(8) 12295 .kr(2) 12296 .sr(1) 12297 .m(m) 12298 .n(n) 12299 .k(k) 12300 .iterations(1) 12301 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12302 } 12303 } 12304 } 12305 } 12306 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,k_gt_8)12307 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_gt_8) { 12308 TEST_REQUIRES_ARM_NEON; 12309 for (size_t k = 9; k < 16; k++) { 12310 GemmMicrokernelTester() 12311 .mr(4) 12312 .nr(8) 12313 .kr(2) 12314 .sr(1) 12315 .m(4) 12316 .n(8) 12317 .k(k) 12318 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12319 } 12320 } 12321 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,k_gt_8_subtile)12322 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_gt_8_subtile) { 12323 TEST_REQUIRES_ARM_NEON; 12324 for (size_t k = 9; k < 16; k++) { 12325 for (uint32_t n = 1; n <= 8; n++) { 12326 for (uint32_t m = 1; m <= 4; m++) { 12327 GemmMicrokernelTester() 12328 .mr(4) 12329 .nr(8) 12330 .kr(2) 12331 .sr(1) 12332 .m(m) 12333 .n(n) 12334 .k(k) 12335 .iterations(1) 12336 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12337 } 12338 } 12339 } 12340 } 12341 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,k_div_8)12342 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_div_8) { 12343 TEST_REQUIRES_ARM_NEON; 12344 for (size_t k = 16; k <= 80; k += 8) { 12345 GemmMicrokernelTester() 12346 .mr(4) 12347 .nr(8) 12348 .kr(2) 12349 .sr(1) 12350 .m(4) 12351 .n(8) 12352 .k(k) 12353 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12354 } 12355 } 12356 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,k_div_8_subtile)12357 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_div_8_subtile) { 12358 TEST_REQUIRES_ARM_NEON; 12359 for (size_t k = 16; k <= 80; k += 8) { 12360 for (uint32_t n = 1; n <= 8; n++) { 12361 for (uint32_t m = 1; m <= 4; m++) { 12362 GemmMicrokernelTester() 12363 .mr(4) 12364 .nr(8) 12365 .kr(2) 12366 .sr(1) 12367 .m(m) 12368 .n(n) 12369 .k(k) 12370 .iterations(1) 12371 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12372 } 12373 } 12374 } 12375 } 12376 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,n_gt_8)12377 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_gt_8) { 12378 TEST_REQUIRES_ARM_NEON; 12379 for (uint32_t n = 9; n < 16; n++) { 12380 for (size_t k = 1; k <= 40; k += 9) { 12381 GemmMicrokernelTester() 12382 .mr(4) 12383 .nr(8) 12384 .kr(2) 12385 .sr(1) 12386 .m(4) 12387 .n(n) 12388 .k(k) 12389 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12390 } 12391 } 12392 } 12393 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,n_gt_8_strided_cn)12394 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_gt_8_strided_cn) { 12395 TEST_REQUIRES_ARM_NEON; 12396 for (uint32_t n = 9; n < 16; n++) { 12397 for (size_t k = 1; k <= 40; k += 9) { 12398 GemmMicrokernelTester() 12399 .mr(4) 12400 .nr(8) 12401 .kr(2) 12402 .sr(1) 12403 .m(4) 12404 .n(n) 12405 .k(k) 12406 .cn_stride(11) 12407 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12408 } 12409 } 12410 } 12411 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,n_gt_8_subtile)12412 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_gt_8_subtile) { 12413 TEST_REQUIRES_ARM_NEON; 12414 for (uint32_t n = 9; n < 16; n++) { 12415 for (size_t k = 1; k <= 40; k += 9) { 12416 for (uint32_t m = 1; m <= 4; m++) { 12417 GemmMicrokernelTester() 12418 .mr(4) 12419 .nr(8) 12420 .kr(2) 12421 .sr(1) 12422 .m(m) 12423 .n(n) 12424 .k(k) 12425 .iterations(1) 12426 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12427 } 12428 } 12429 } 12430 } 12431 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,n_div_8)12432 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_div_8) { 12433 TEST_REQUIRES_ARM_NEON; 12434 for (uint32_t n = 16; n <= 24; n += 8) { 12435 for (size_t k = 1; k <= 40; k += 9) { 12436 GemmMicrokernelTester() 12437 .mr(4) 12438 .nr(8) 12439 .kr(2) 12440 .sr(1) 12441 .m(4) 12442 .n(n) 12443 .k(k) 12444 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12445 } 12446 } 12447 } 12448 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,n_div_8_strided_cn)12449 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_div_8_strided_cn) { 12450 TEST_REQUIRES_ARM_NEON; 12451 for (uint32_t n = 16; n <= 24; n += 8) { 12452 for (size_t k = 1; k <= 40; k += 9) { 12453 GemmMicrokernelTester() 12454 .mr(4) 12455 .nr(8) 12456 .kr(2) 12457 .sr(1) 12458 .m(4) 12459 .n(n) 12460 .k(k) 12461 .cn_stride(11) 12462 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12463 } 12464 } 12465 } 12466 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,n_div_8_subtile)12467 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_div_8_subtile) { 12468 TEST_REQUIRES_ARM_NEON; 12469 for (uint32_t n = 16; n <= 24; n += 8) { 12470 for (size_t k = 1; k <= 40; k += 9) { 12471 for (uint32_t m = 1; m <= 4; m++) { 12472 GemmMicrokernelTester() 12473 .mr(4) 12474 .nr(8) 12475 .kr(2) 12476 .sr(1) 12477 .m(m) 12478 .n(n) 12479 .k(k) 12480 .iterations(1) 12481 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12482 } 12483 } 12484 } 12485 } 12486 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,small_kernel)12487 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, small_kernel) { 12488 TEST_REQUIRES_ARM_NEON; 12489 for (size_t k = 1; k <= 40; k += 9) { 12490 GemmMicrokernelTester() 12491 .mr(4) 12492 .nr(8) 12493 .kr(2) 12494 .sr(1) 12495 .m(4) 12496 .n(8) 12497 .k(k) 12498 .ks(3) 12499 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12500 } 12501 } 12502 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,small_kernel_subtile)12503 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, small_kernel_subtile) { 12504 TEST_REQUIRES_ARM_NEON; 12505 for (size_t k = 1; k <= 40; k += 9) { 12506 for (uint32_t n = 1; n <= 8; n++) { 12507 for (uint32_t m = 1; m <= 4; m++) { 12508 GemmMicrokernelTester() 12509 .mr(4) 12510 .nr(8) 12511 .kr(2) 12512 .sr(1) 12513 .m(m) 12514 .n(n) 12515 .k(k) 12516 .ks(3) 12517 .iterations(1) 12518 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12519 } 12520 } 12521 } 12522 } 12523 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,n_gt_8_small_kernel)12524 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_gt_8_small_kernel) { 12525 TEST_REQUIRES_ARM_NEON; 12526 for (uint32_t n = 9; n < 16; n++) { 12527 for (size_t k = 1; k <= 40; k += 9) { 12528 GemmMicrokernelTester() 12529 .mr(4) 12530 .nr(8) 12531 .kr(2) 12532 .sr(1) 12533 .m(4) 12534 .n(n) 12535 .k(k) 12536 .ks(3) 12537 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12538 } 12539 } 12540 } 12541 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,n_div_8_small_kernel)12542 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_div_8_small_kernel) { 12543 TEST_REQUIRES_ARM_NEON; 12544 for (uint32_t n = 16; n <= 24; n += 8) { 12545 for (size_t k = 1; k <= 40; k += 9) { 12546 GemmMicrokernelTester() 12547 .mr(4) 12548 .nr(8) 12549 .kr(2) 12550 .sr(1) 12551 .m(4) 12552 .n(n) 12553 .k(k) 12554 .ks(3) 12555 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12556 } 12557 } 12558 } 12559 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,strided_cm_subtile)12560 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, strided_cm_subtile) { 12561 TEST_REQUIRES_ARM_NEON; 12562 for (size_t k = 1; k <= 40; k += 9) { 12563 for (uint32_t n = 1; n <= 8; n++) { 12564 for (uint32_t m = 1; m <= 4; m++) { 12565 GemmMicrokernelTester() 12566 .mr(4) 12567 .nr(8) 12568 .kr(2) 12569 .sr(1) 12570 .m(m) 12571 .n(n) 12572 .k(k) 12573 .cm_stride(11) 12574 .iterations(1) 12575 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12576 } 12577 } 12578 } 12579 } 12580 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,a_offset)12581 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, a_offset) { 12582 TEST_REQUIRES_ARM_NEON; 12583 for (size_t k = 1; k <= 40; k += 9) { 12584 GemmMicrokernelTester() 12585 .mr(4) 12586 .nr(8) 12587 .kr(2) 12588 .sr(1) 12589 .m(4) 12590 .n(8) 12591 .k(k) 12592 .ks(3) 12593 .a_offset(163) 12594 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12595 } 12596 } 12597 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,zero)12598 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, zero) { 12599 TEST_REQUIRES_ARM_NEON; 12600 for (size_t k = 1; k <= 40; k += 9) { 12601 for (uint32_t mz = 0; mz < 4; mz++) { 12602 GemmMicrokernelTester() 12603 .mr(4) 12604 .nr(8) 12605 .kr(2) 12606 .sr(1) 12607 .m(4) 12608 .n(8) 12609 .k(k) 12610 .ks(3) 12611 .a_offset(163) 12612 .zero_index(mz) 12613 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12614 } 12615 } 12616 } 12617 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,qmin)12618 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, qmin) { 12619 TEST_REQUIRES_ARM_NEON; 12620 GemmMicrokernelTester() 12621 .mr(4) 12622 .nr(8) 12623 .kr(2) 12624 .sr(1) 12625 .m(4) 12626 .n(8) 12627 .k(8) 12628 .qmin(128) 12629 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12630 } 12631 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,qmax)12632 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, qmax) { 12633 TEST_REQUIRES_ARM_NEON; 12634 GemmMicrokernelTester() 12635 .mr(4) 12636 .nr(8) 12637 .kr(2) 12638 .sr(1) 12639 .m(4) 12640 .n(8) 12641 .k(8) 12642 .qmax(128) 12643 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12644 } 12645 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP,strided_cm)12646 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, strided_cm) { 12647 TEST_REQUIRES_ARM_NEON; 12648 GemmMicrokernelTester() 12649 .mr(4) 12650 .nr(8) 12651 .kr(2) 12652 .sr(1) 12653 .m(4) 12654 .n(8) 12655 .k(8) 12656 .cm_stride(11) 12657 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12658 } 12659 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 12660 12661 12662 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,k_eq_8)12663 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, k_eq_8) { 12664 TEST_REQUIRES_ARM_NEON; 12665 GemmMicrokernelTester() 12666 .mr(2) 12667 .nr(16) 12668 .kr(2) 12669 .sr(1) 12670 .m(2) 12671 .n(16) 12672 .k(8) 12673 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12674 } 12675 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,strided_cn)12676 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, strided_cn) { 12677 TEST_REQUIRES_ARM_NEON; 12678 GemmMicrokernelTester() 12679 .mr(2) 12680 .nr(16) 12681 .kr(2) 12682 .sr(1) 12683 .m(2) 12684 .n(16) 12685 .k(8) 12686 .cn_stride(19) 12687 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12688 } 12689 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,k_eq_8_subtile)12690 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, k_eq_8_subtile) { 12691 TEST_REQUIRES_ARM_NEON; 12692 for (uint32_t n = 1; n <= 16; n++) { 12693 for (uint32_t m = 1; m <= 2; m++) { 12694 GemmMicrokernelTester() 12695 .mr(2) 12696 .nr(16) 12697 .kr(2) 12698 .sr(1) 12699 .m(m) 12700 .n(n) 12701 .k(8) 12702 .iterations(1) 12703 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12704 } 12705 } 12706 } 12707 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,k_eq_8_subtile_m)12708 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, k_eq_8_subtile_m) { 12709 TEST_REQUIRES_ARM_NEON; 12710 for (uint32_t m = 1; m <= 2; m++) { 12711 GemmMicrokernelTester() 12712 .mr(2) 12713 .nr(16) 12714 .kr(2) 12715 .sr(1) 12716 .m(m) 12717 .n(16) 12718 .k(8) 12719 .iterations(1) 12720 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12721 } 12722 } 12723 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,k_eq_8_subtile_n)12724 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, k_eq_8_subtile_n) { 12725 TEST_REQUIRES_ARM_NEON; 12726 for (uint32_t n = 1; n <= 16; n++) { 12727 GemmMicrokernelTester() 12728 .mr(2) 12729 .nr(16) 12730 .kr(2) 12731 .sr(1) 12732 .m(2) 12733 .n(n) 12734 .k(8) 12735 .iterations(1) 12736 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12737 } 12738 } 12739 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,k_lt_8)12740 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, k_lt_8) { 12741 TEST_REQUIRES_ARM_NEON; 12742 for (size_t k = 1; k < 8; k++) { 12743 GemmMicrokernelTester() 12744 .mr(2) 12745 .nr(16) 12746 .kr(2) 12747 .sr(1) 12748 .m(2) 12749 .n(16) 12750 .k(k) 12751 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12752 } 12753 } 12754 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,k_lt_8_subtile)12755 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, k_lt_8_subtile) { 12756 TEST_REQUIRES_ARM_NEON; 12757 for (size_t k = 1; k < 8; k++) { 12758 for (uint32_t n = 1; n <= 16; n++) { 12759 for (uint32_t m = 1; m <= 2; m++) { 12760 GemmMicrokernelTester() 12761 .mr(2) 12762 .nr(16) 12763 .kr(2) 12764 .sr(1) 12765 .m(m) 12766 .n(n) 12767 .k(k) 12768 .iterations(1) 12769 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12770 } 12771 } 12772 } 12773 } 12774 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,k_gt_8)12775 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, k_gt_8) { 12776 TEST_REQUIRES_ARM_NEON; 12777 for (size_t k = 9; k < 16; k++) { 12778 GemmMicrokernelTester() 12779 .mr(2) 12780 .nr(16) 12781 .kr(2) 12782 .sr(1) 12783 .m(2) 12784 .n(16) 12785 .k(k) 12786 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12787 } 12788 } 12789 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,k_gt_8_subtile)12790 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, k_gt_8_subtile) { 12791 TEST_REQUIRES_ARM_NEON; 12792 for (size_t k = 9; k < 16; k++) { 12793 for (uint32_t n = 1; n <= 16; n++) { 12794 for (uint32_t m = 1; m <= 2; m++) { 12795 GemmMicrokernelTester() 12796 .mr(2) 12797 .nr(16) 12798 .kr(2) 12799 .sr(1) 12800 .m(m) 12801 .n(n) 12802 .k(k) 12803 .iterations(1) 12804 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12805 } 12806 } 12807 } 12808 } 12809 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,k_div_8)12810 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, k_div_8) { 12811 TEST_REQUIRES_ARM_NEON; 12812 for (size_t k = 16; k <= 80; k += 8) { 12813 GemmMicrokernelTester() 12814 .mr(2) 12815 .nr(16) 12816 .kr(2) 12817 .sr(1) 12818 .m(2) 12819 .n(16) 12820 .k(k) 12821 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12822 } 12823 } 12824 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,k_div_8_subtile)12825 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, k_div_8_subtile) { 12826 TEST_REQUIRES_ARM_NEON; 12827 for (size_t k = 16; k <= 80; k += 8) { 12828 for (uint32_t n = 1; n <= 16; n++) { 12829 for (uint32_t m = 1; m <= 2; m++) { 12830 GemmMicrokernelTester() 12831 .mr(2) 12832 .nr(16) 12833 .kr(2) 12834 .sr(1) 12835 .m(m) 12836 .n(n) 12837 .k(k) 12838 .iterations(1) 12839 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12840 } 12841 } 12842 } 12843 } 12844 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,n_gt_16)12845 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, n_gt_16) { 12846 TEST_REQUIRES_ARM_NEON; 12847 for (uint32_t n = 17; n < 32; n++) { 12848 for (size_t k = 1; k <= 40; k += 9) { 12849 GemmMicrokernelTester() 12850 .mr(2) 12851 .nr(16) 12852 .kr(2) 12853 .sr(1) 12854 .m(2) 12855 .n(n) 12856 .k(k) 12857 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12858 } 12859 } 12860 } 12861 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,n_gt_16_strided_cn)12862 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, n_gt_16_strided_cn) { 12863 TEST_REQUIRES_ARM_NEON; 12864 for (uint32_t n = 17; n < 32; n++) { 12865 for (size_t k = 1; k <= 40; k += 9) { 12866 GemmMicrokernelTester() 12867 .mr(2) 12868 .nr(16) 12869 .kr(2) 12870 .sr(1) 12871 .m(2) 12872 .n(n) 12873 .k(k) 12874 .cn_stride(19) 12875 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12876 } 12877 } 12878 } 12879 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,n_gt_16_subtile)12880 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, n_gt_16_subtile) { 12881 TEST_REQUIRES_ARM_NEON; 12882 for (uint32_t n = 17; n < 32; n++) { 12883 for (size_t k = 1; k <= 40; k += 9) { 12884 for (uint32_t m = 1; m <= 2; m++) { 12885 GemmMicrokernelTester() 12886 .mr(2) 12887 .nr(16) 12888 .kr(2) 12889 .sr(1) 12890 .m(m) 12891 .n(n) 12892 .k(k) 12893 .iterations(1) 12894 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12895 } 12896 } 12897 } 12898 } 12899 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,n_div_16)12900 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, n_div_16) { 12901 TEST_REQUIRES_ARM_NEON; 12902 for (uint32_t n = 32; n <= 48; n += 16) { 12903 for (size_t k = 1; k <= 40; k += 9) { 12904 GemmMicrokernelTester() 12905 .mr(2) 12906 .nr(16) 12907 .kr(2) 12908 .sr(1) 12909 .m(2) 12910 .n(n) 12911 .k(k) 12912 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12913 } 12914 } 12915 } 12916 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,n_div_16_strided_cn)12917 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, n_div_16_strided_cn) { 12918 TEST_REQUIRES_ARM_NEON; 12919 for (uint32_t n = 32; n <= 48; n += 16) { 12920 for (size_t k = 1; k <= 40; k += 9) { 12921 GemmMicrokernelTester() 12922 .mr(2) 12923 .nr(16) 12924 .kr(2) 12925 .sr(1) 12926 .m(2) 12927 .n(n) 12928 .k(k) 12929 .cn_stride(19) 12930 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12931 } 12932 } 12933 } 12934 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,n_div_16_subtile)12935 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, n_div_16_subtile) { 12936 TEST_REQUIRES_ARM_NEON; 12937 for (uint32_t n = 32; n <= 48; n += 16) { 12938 for (size_t k = 1; k <= 40; k += 9) { 12939 for (uint32_t m = 1; m <= 2; m++) { 12940 GemmMicrokernelTester() 12941 .mr(2) 12942 .nr(16) 12943 .kr(2) 12944 .sr(1) 12945 .m(m) 12946 .n(n) 12947 .k(k) 12948 .iterations(1) 12949 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12950 } 12951 } 12952 } 12953 } 12954 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,small_kernel)12955 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, small_kernel) { 12956 TEST_REQUIRES_ARM_NEON; 12957 for (size_t k = 1; k <= 40; k += 9) { 12958 GemmMicrokernelTester() 12959 .mr(2) 12960 .nr(16) 12961 .kr(2) 12962 .sr(1) 12963 .m(2) 12964 .n(16) 12965 .k(k) 12966 .ks(3) 12967 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12968 } 12969 } 12970 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,small_kernel_subtile)12971 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, small_kernel_subtile) { 12972 TEST_REQUIRES_ARM_NEON; 12973 for (size_t k = 1; k <= 40; k += 9) { 12974 for (uint32_t n = 1; n <= 16; n++) { 12975 for (uint32_t m = 1; m <= 2; m++) { 12976 GemmMicrokernelTester() 12977 .mr(2) 12978 .nr(16) 12979 .kr(2) 12980 .sr(1) 12981 .m(m) 12982 .n(n) 12983 .k(k) 12984 .ks(3) 12985 .iterations(1) 12986 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12987 } 12988 } 12989 } 12990 } 12991 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,n_gt_16_small_kernel)12992 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, n_gt_16_small_kernel) { 12993 TEST_REQUIRES_ARM_NEON; 12994 for (uint32_t n = 17; n < 32; n++) { 12995 for (size_t k = 1; k <= 40; k += 9) { 12996 GemmMicrokernelTester() 12997 .mr(2) 12998 .nr(16) 12999 .kr(2) 13000 .sr(1) 13001 .m(2) 13002 .n(n) 13003 .k(k) 13004 .ks(3) 13005 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13006 } 13007 } 13008 } 13009 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,n_div_16_small_kernel)13010 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, n_div_16_small_kernel) { 13011 TEST_REQUIRES_ARM_NEON; 13012 for (uint32_t n = 32; n <= 48; n += 16) { 13013 for (size_t k = 1; k <= 40; k += 9) { 13014 GemmMicrokernelTester() 13015 .mr(2) 13016 .nr(16) 13017 .kr(2) 13018 .sr(1) 13019 .m(2) 13020 .n(n) 13021 .k(k) 13022 .ks(3) 13023 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13024 } 13025 } 13026 } 13027 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,strided_cm_subtile)13028 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, strided_cm_subtile) { 13029 TEST_REQUIRES_ARM_NEON; 13030 for (size_t k = 1; k <= 40; k += 9) { 13031 for (uint32_t n = 1; n <= 16; n++) { 13032 for (uint32_t m = 1; m <= 2; m++) { 13033 GemmMicrokernelTester() 13034 .mr(2) 13035 .nr(16) 13036 .kr(2) 13037 .sr(1) 13038 .m(m) 13039 .n(n) 13040 .k(k) 13041 .cm_stride(19) 13042 .iterations(1) 13043 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13044 } 13045 } 13046 } 13047 } 13048 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,a_offset)13049 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, a_offset) { 13050 TEST_REQUIRES_ARM_NEON; 13051 for (size_t k = 1; k <= 40; k += 9) { 13052 GemmMicrokernelTester() 13053 .mr(2) 13054 .nr(16) 13055 .kr(2) 13056 .sr(1) 13057 .m(2) 13058 .n(16) 13059 .k(k) 13060 .ks(3) 13061 .a_offset(83) 13062 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13063 } 13064 } 13065 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,zero)13066 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, zero) { 13067 TEST_REQUIRES_ARM_NEON; 13068 for (size_t k = 1; k <= 40; k += 9) { 13069 for (uint32_t mz = 0; mz < 2; mz++) { 13070 GemmMicrokernelTester() 13071 .mr(2) 13072 .nr(16) 13073 .kr(2) 13074 .sr(1) 13075 .m(2) 13076 .n(16) 13077 .k(k) 13078 .ks(3) 13079 .a_offset(83) 13080 .zero_index(mz) 13081 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13082 } 13083 } 13084 } 13085 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,qmin)13086 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, qmin) { 13087 TEST_REQUIRES_ARM_NEON; 13088 GemmMicrokernelTester() 13089 .mr(2) 13090 .nr(16) 13091 .kr(2) 13092 .sr(1) 13093 .m(2) 13094 .n(16) 13095 .k(8) 13096 .qmin(128) 13097 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13098 } 13099 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,qmax)13100 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, qmax) { 13101 TEST_REQUIRES_ARM_NEON; 13102 GemmMicrokernelTester() 13103 .mr(2) 13104 .nr(16) 13105 .kr(2) 13106 .sr(1) 13107 .m(2) 13108 .n(16) 13109 .k(8) 13110 .qmax(128) 13111 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13112 } 13113 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP,strided_cm)13114 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_DUP, strided_cm) { 13115 TEST_REQUIRES_ARM_NEON; 13116 GemmMicrokernelTester() 13117 .mr(2) 13118 .nr(16) 13119 .kr(2) 13120 .sr(1) 13121 .m(2) 13122 .n(16) 13123 .k(8) 13124 .cm_stride(19) 13125 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13126 } 13127 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 13128 13129 13130 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,k_eq_16)13131 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_eq_16) { 13132 TEST_REQUIRES_ARM_NEON; 13133 GemmMicrokernelTester() 13134 .mr(1) 13135 .nr(8) 13136 .kr(2) 13137 .sr(1) 13138 .m(1) 13139 .n(8) 13140 .k(16) 13141 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13142 } 13143 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,strided_cn)13144 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, strided_cn) { 13145 TEST_REQUIRES_ARM_NEON; 13146 GemmMicrokernelTester() 13147 .mr(1) 13148 .nr(8) 13149 .kr(2) 13150 .sr(1) 13151 .m(1) 13152 .n(8) 13153 .k(16) 13154 .cn_stride(11) 13155 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13156 } 13157 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,k_eq_16_subtile)13158 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_eq_16_subtile) { 13159 TEST_REQUIRES_ARM_NEON; 13160 for (uint32_t n = 1; n <= 8; n++) { 13161 for (uint32_t m = 1; m <= 1; m++) { 13162 GemmMicrokernelTester() 13163 .mr(1) 13164 .nr(8) 13165 .kr(2) 13166 .sr(1) 13167 .m(m) 13168 .n(n) 13169 .k(16) 13170 .iterations(1) 13171 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13172 } 13173 } 13174 } 13175 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,k_eq_16_subtile_m)13176 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_eq_16_subtile_m) { 13177 TEST_REQUIRES_ARM_NEON; 13178 for (uint32_t m = 1; m <= 1; m++) { 13179 GemmMicrokernelTester() 13180 .mr(1) 13181 .nr(8) 13182 .kr(2) 13183 .sr(1) 13184 .m(m) 13185 .n(8) 13186 .k(16) 13187 .iterations(1) 13188 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13189 } 13190 } 13191 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,k_eq_16_subtile_n)13192 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_eq_16_subtile_n) { 13193 TEST_REQUIRES_ARM_NEON; 13194 for (uint32_t n = 1; n <= 8; n++) { 13195 GemmMicrokernelTester() 13196 .mr(1) 13197 .nr(8) 13198 .kr(2) 13199 .sr(1) 13200 .m(1) 13201 .n(n) 13202 .k(16) 13203 .iterations(1) 13204 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13205 } 13206 } 13207 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,k_lt_16)13208 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_lt_16) { 13209 TEST_REQUIRES_ARM_NEON; 13210 for (size_t k = 1; k < 16; k++) { 13211 GemmMicrokernelTester() 13212 .mr(1) 13213 .nr(8) 13214 .kr(2) 13215 .sr(1) 13216 .m(1) 13217 .n(8) 13218 .k(k) 13219 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13220 } 13221 } 13222 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,k_lt_16_subtile)13223 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_lt_16_subtile) { 13224 TEST_REQUIRES_ARM_NEON; 13225 for (size_t k = 1; k < 16; k++) { 13226 for (uint32_t n = 1; n <= 8; n++) { 13227 for (uint32_t m = 1; m <= 1; m++) { 13228 GemmMicrokernelTester() 13229 .mr(1) 13230 .nr(8) 13231 .kr(2) 13232 .sr(1) 13233 .m(m) 13234 .n(n) 13235 .k(k) 13236 .iterations(1) 13237 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13238 } 13239 } 13240 } 13241 } 13242 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,k_gt_16)13243 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_gt_16) { 13244 TEST_REQUIRES_ARM_NEON; 13245 for (size_t k = 17; k < 32; k++) { 13246 GemmMicrokernelTester() 13247 .mr(1) 13248 .nr(8) 13249 .kr(2) 13250 .sr(1) 13251 .m(1) 13252 .n(8) 13253 .k(k) 13254 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13255 } 13256 } 13257 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,k_gt_16_subtile)13258 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_gt_16_subtile) { 13259 TEST_REQUIRES_ARM_NEON; 13260 for (size_t k = 17; k < 32; k++) { 13261 for (uint32_t n = 1; n <= 8; n++) { 13262 for (uint32_t m = 1; m <= 1; m++) { 13263 GemmMicrokernelTester() 13264 .mr(1) 13265 .nr(8) 13266 .kr(2) 13267 .sr(1) 13268 .m(m) 13269 .n(n) 13270 .k(k) 13271 .iterations(1) 13272 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13273 } 13274 } 13275 } 13276 } 13277 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,k_div_16)13278 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_div_16) { 13279 TEST_REQUIRES_ARM_NEON; 13280 for (size_t k = 32; k <= 160; k += 16) { 13281 GemmMicrokernelTester() 13282 .mr(1) 13283 .nr(8) 13284 .kr(2) 13285 .sr(1) 13286 .m(1) 13287 .n(8) 13288 .k(k) 13289 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13290 } 13291 } 13292 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,k_div_16_subtile)13293 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, k_div_16_subtile) { 13294 TEST_REQUIRES_ARM_NEON; 13295 for (size_t k = 32; k <= 160; k += 16) { 13296 for (uint32_t n = 1; n <= 8; n++) { 13297 for (uint32_t m = 1; m <= 1; m++) { 13298 GemmMicrokernelTester() 13299 .mr(1) 13300 .nr(8) 13301 .kr(2) 13302 .sr(1) 13303 .m(m) 13304 .n(n) 13305 .k(k) 13306 .iterations(1) 13307 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13308 } 13309 } 13310 } 13311 } 13312 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,n_gt_8)13313 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, n_gt_8) { 13314 TEST_REQUIRES_ARM_NEON; 13315 for (uint32_t n = 9; n < 16; n++) { 13316 for (size_t k = 1; k <= 80; k += 17) { 13317 GemmMicrokernelTester() 13318 .mr(1) 13319 .nr(8) 13320 .kr(2) 13321 .sr(1) 13322 .m(1) 13323 .n(n) 13324 .k(k) 13325 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13326 } 13327 } 13328 } 13329 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,n_gt_8_strided_cn)13330 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, n_gt_8_strided_cn) { 13331 TEST_REQUIRES_ARM_NEON; 13332 for (uint32_t n = 9; n < 16; n++) { 13333 for (size_t k = 1; k <= 80; k += 17) { 13334 GemmMicrokernelTester() 13335 .mr(1) 13336 .nr(8) 13337 .kr(2) 13338 .sr(1) 13339 .m(1) 13340 .n(n) 13341 .k(k) 13342 .cn_stride(11) 13343 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13344 } 13345 } 13346 } 13347 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,n_gt_8_subtile)13348 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, n_gt_8_subtile) { 13349 TEST_REQUIRES_ARM_NEON; 13350 for (uint32_t n = 9; n < 16; n++) { 13351 for (size_t k = 1; k <= 80; k += 17) { 13352 for (uint32_t m = 1; m <= 1; m++) { 13353 GemmMicrokernelTester() 13354 .mr(1) 13355 .nr(8) 13356 .kr(2) 13357 .sr(1) 13358 .m(m) 13359 .n(n) 13360 .k(k) 13361 .iterations(1) 13362 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13363 } 13364 } 13365 } 13366 } 13367 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,n_div_8)13368 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, n_div_8) { 13369 TEST_REQUIRES_ARM_NEON; 13370 for (uint32_t n = 16; n <= 24; n += 8) { 13371 for (size_t k = 1; k <= 80; k += 17) { 13372 GemmMicrokernelTester() 13373 .mr(1) 13374 .nr(8) 13375 .kr(2) 13376 .sr(1) 13377 .m(1) 13378 .n(n) 13379 .k(k) 13380 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13381 } 13382 } 13383 } 13384 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,n_div_8_strided_cn)13385 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, n_div_8_strided_cn) { 13386 TEST_REQUIRES_ARM_NEON; 13387 for (uint32_t n = 16; n <= 24; n += 8) { 13388 for (size_t k = 1; k <= 80; k += 17) { 13389 GemmMicrokernelTester() 13390 .mr(1) 13391 .nr(8) 13392 .kr(2) 13393 .sr(1) 13394 .m(1) 13395 .n(n) 13396 .k(k) 13397 .cn_stride(11) 13398 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13399 } 13400 } 13401 } 13402 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,n_div_8_subtile)13403 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, n_div_8_subtile) { 13404 TEST_REQUIRES_ARM_NEON; 13405 for (uint32_t n = 16; n <= 24; n += 8) { 13406 for (size_t k = 1; k <= 80; k += 17) { 13407 for (uint32_t m = 1; m <= 1; m++) { 13408 GemmMicrokernelTester() 13409 .mr(1) 13410 .nr(8) 13411 .kr(2) 13412 .sr(1) 13413 .m(m) 13414 .n(n) 13415 .k(k) 13416 .iterations(1) 13417 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13418 } 13419 } 13420 } 13421 } 13422 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,small_kernel)13423 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, small_kernel) { 13424 TEST_REQUIRES_ARM_NEON; 13425 for (size_t k = 1; k <= 80; k += 17) { 13426 GemmMicrokernelTester() 13427 .mr(1) 13428 .nr(8) 13429 .kr(2) 13430 .sr(1) 13431 .m(1) 13432 .n(8) 13433 .k(k) 13434 .ks(3) 13435 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13436 } 13437 } 13438 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,small_kernel_subtile)13439 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, small_kernel_subtile) { 13440 TEST_REQUIRES_ARM_NEON; 13441 for (size_t k = 1; k <= 80; k += 17) { 13442 for (uint32_t n = 1; n <= 8; n++) { 13443 for (uint32_t m = 1; m <= 1; m++) { 13444 GemmMicrokernelTester() 13445 .mr(1) 13446 .nr(8) 13447 .kr(2) 13448 .sr(1) 13449 .m(m) 13450 .n(n) 13451 .k(k) 13452 .ks(3) 13453 .iterations(1) 13454 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13455 } 13456 } 13457 } 13458 } 13459 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,n_gt_8_small_kernel)13460 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, n_gt_8_small_kernel) { 13461 TEST_REQUIRES_ARM_NEON; 13462 for (uint32_t n = 9; n < 16; n++) { 13463 for (size_t k = 1; k <= 80; k += 17) { 13464 GemmMicrokernelTester() 13465 .mr(1) 13466 .nr(8) 13467 .kr(2) 13468 .sr(1) 13469 .m(1) 13470 .n(n) 13471 .k(k) 13472 .ks(3) 13473 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13474 } 13475 } 13476 } 13477 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,n_div_8_small_kernel)13478 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, n_div_8_small_kernel) { 13479 TEST_REQUIRES_ARM_NEON; 13480 for (uint32_t n = 16; n <= 24; n += 8) { 13481 for (size_t k = 1; k <= 80; k += 17) { 13482 GemmMicrokernelTester() 13483 .mr(1) 13484 .nr(8) 13485 .kr(2) 13486 .sr(1) 13487 .m(1) 13488 .n(n) 13489 .k(k) 13490 .ks(3) 13491 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13492 } 13493 } 13494 } 13495 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,strided_cm_subtile)13496 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, strided_cm_subtile) { 13497 TEST_REQUIRES_ARM_NEON; 13498 for (size_t k = 1; k <= 80; k += 17) { 13499 for (uint32_t n = 1; n <= 8; n++) { 13500 for (uint32_t m = 1; m <= 1; m++) { 13501 GemmMicrokernelTester() 13502 .mr(1) 13503 .nr(8) 13504 .kr(2) 13505 .sr(1) 13506 .m(m) 13507 .n(n) 13508 .k(k) 13509 .cm_stride(11) 13510 .iterations(1) 13511 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13512 } 13513 } 13514 } 13515 } 13516 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,a_offset)13517 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, a_offset) { 13518 TEST_REQUIRES_ARM_NEON; 13519 for (size_t k = 1; k <= 80; k += 17) { 13520 GemmMicrokernelTester() 13521 .mr(1) 13522 .nr(8) 13523 .kr(2) 13524 .sr(1) 13525 .m(1) 13526 .n(8) 13527 .k(k) 13528 .ks(3) 13529 .a_offset(83) 13530 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13531 } 13532 } 13533 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,zero)13534 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, zero) { 13535 TEST_REQUIRES_ARM_NEON; 13536 for (size_t k = 1; k <= 80; k += 17) { 13537 for (uint32_t mz = 0; mz < 1; mz++) { 13538 GemmMicrokernelTester() 13539 .mr(1) 13540 .nr(8) 13541 .kr(2) 13542 .sr(1) 13543 .m(1) 13544 .n(8) 13545 .k(k) 13546 .ks(3) 13547 .a_offset(83) 13548 .zero_index(mz) 13549 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13550 } 13551 } 13552 } 13553 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,qmin)13554 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, qmin) { 13555 TEST_REQUIRES_ARM_NEON; 13556 GemmMicrokernelTester() 13557 .mr(1) 13558 .nr(8) 13559 .kr(2) 13560 .sr(1) 13561 .m(1) 13562 .n(8) 13563 .k(16) 13564 .qmin(128) 13565 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13566 } 13567 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,qmax)13568 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, qmax) { 13569 TEST_REQUIRES_ARM_NEON; 13570 GemmMicrokernelTester() 13571 .mr(1) 13572 .nr(8) 13573 .kr(2) 13574 .sr(1) 13575 .m(1) 13576 .n(8) 13577 .k(16) 13578 .qmax(128) 13579 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13580 } 13581 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP,strided_cm)13582 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_DUP, strided_cm) { 13583 TEST_REQUIRES_ARM_NEON; 13584 GemmMicrokernelTester() 13585 .mr(1) 13586 .nr(8) 13587 .kr(2) 13588 .sr(1) 13589 .m(1) 13590 .n(8) 13591 .k(16) 13592 .cm_stride(11) 13593 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13594 } 13595 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 13596 13597 13598 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,k_eq_16)13599 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_eq_16) { 13600 TEST_REQUIRES_ARM_NEON; 13601 GemmMicrokernelTester() 13602 .mr(3) 13603 .nr(8) 13604 .kr(2) 13605 .sr(1) 13606 .m(3) 13607 .n(8) 13608 .k(16) 13609 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13610 } 13611 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,strided_cn)13612 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, strided_cn) { 13613 TEST_REQUIRES_ARM_NEON; 13614 GemmMicrokernelTester() 13615 .mr(3) 13616 .nr(8) 13617 .kr(2) 13618 .sr(1) 13619 .m(3) 13620 .n(8) 13621 .k(16) 13622 .cn_stride(11) 13623 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13624 } 13625 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,k_eq_16_subtile)13626 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_eq_16_subtile) { 13627 TEST_REQUIRES_ARM_NEON; 13628 for (uint32_t n = 1; n <= 8; n++) { 13629 for (uint32_t m = 1; m <= 3; m++) { 13630 GemmMicrokernelTester() 13631 .mr(3) 13632 .nr(8) 13633 .kr(2) 13634 .sr(1) 13635 .m(m) 13636 .n(n) 13637 .k(16) 13638 .iterations(1) 13639 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13640 } 13641 } 13642 } 13643 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,k_eq_16_subtile_m)13644 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_eq_16_subtile_m) { 13645 TEST_REQUIRES_ARM_NEON; 13646 for (uint32_t m = 1; m <= 3; m++) { 13647 GemmMicrokernelTester() 13648 .mr(3) 13649 .nr(8) 13650 .kr(2) 13651 .sr(1) 13652 .m(m) 13653 .n(8) 13654 .k(16) 13655 .iterations(1) 13656 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13657 } 13658 } 13659 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,k_eq_16_subtile_n)13660 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_eq_16_subtile_n) { 13661 TEST_REQUIRES_ARM_NEON; 13662 for (uint32_t n = 1; n <= 8; n++) { 13663 GemmMicrokernelTester() 13664 .mr(3) 13665 .nr(8) 13666 .kr(2) 13667 .sr(1) 13668 .m(3) 13669 .n(n) 13670 .k(16) 13671 .iterations(1) 13672 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13673 } 13674 } 13675 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,k_lt_16)13676 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_lt_16) { 13677 TEST_REQUIRES_ARM_NEON; 13678 for (size_t k = 1; k < 16; k++) { 13679 GemmMicrokernelTester() 13680 .mr(3) 13681 .nr(8) 13682 .kr(2) 13683 .sr(1) 13684 .m(3) 13685 .n(8) 13686 .k(k) 13687 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13688 } 13689 } 13690 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,k_lt_16_subtile)13691 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_lt_16_subtile) { 13692 TEST_REQUIRES_ARM_NEON; 13693 for (size_t k = 1; k < 16; k++) { 13694 for (uint32_t n = 1; n <= 8; n++) { 13695 for (uint32_t m = 1; m <= 3; m++) { 13696 GemmMicrokernelTester() 13697 .mr(3) 13698 .nr(8) 13699 .kr(2) 13700 .sr(1) 13701 .m(m) 13702 .n(n) 13703 .k(k) 13704 .iterations(1) 13705 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13706 } 13707 } 13708 } 13709 } 13710 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,k_gt_16)13711 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_gt_16) { 13712 TEST_REQUIRES_ARM_NEON; 13713 for (size_t k = 17; k < 32; k++) { 13714 GemmMicrokernelTester() 13715 .mr(3) 13716 .nr(8) 13717 .kr(2) 13718 .sr(1) 13719 .m(3) 13720 .n(8) 13721 .k(k) 13722 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13723 } 13724 } 13725 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,k_gt_16_subtile)13726 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_gt_16_subtile) { 13727 TEST_REQUIRES_ARM_NEON; 13728 for (size_t k = 17; k < 32; k++) { 13729 for (uint32_t n = 1; n <= 8; n++) { 13730 for (uint32_t m = 1; m <= 3; m++) { 13731 GemmMicrokernelTester() 13732 .mr(3) 13733 .nr(8) 13734 .kr(2) 13735 .sr(1) 13736 .m(m) 13737 .n(n) 13738 .k(k) 13739 .iterations(1) 13740 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13741 } 13742 } 13743 } 13744 } 13745 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,k_div_16)13746 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_div_16) { 13747 TEST_REQUIRES_ARM_NEON; 13748 for (size_t k = 32; k <= 160; k += 16) { 13749 GemmMicrokernelTester() 13750 .mr(3) 13751 .nr(8) 13752 .kr(2) 13753 .sr(1) 13754 .m(3) 13755 .n(8) 13756 .k(k) 13757 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13758 } 13759 } 13760 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,k_div_16_subtile)13761 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_div_16_subtile) { 13762 TEST_REQUIRES_ARM_NEON; 13763 for (size_t k = 32; k <= 160; k += 16) { 13764 for (uint32_t n = 1; n <= 8; n++) { 13765 for (uint32_t m = 1; m <= 3; m++) { 13766 GemmMicrokernelTester() 13767 .mr(3) 13768 .nr(8) 13769 .kr(2) 13770 .sr(1) 13771 .m(m) 13772 .n(n) 13773 .k(k) 13774 .iterations(1) 13775 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13776 } 13777 } 13778 } 13779 } 13780 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,n_gt_8)13781 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_gt_8) { 13782 TEST_REQUIRES_ARM_NEON; 13783 for (uint32_t n = 9; n < 16; n++) { 13784 for (size_t k = 1; k <= 80; k += 17) { 13785 GemmMicrokernelTester() 13786 .mr(3) 13787 .nr(8) 13788 .kr(2) 13789 .sr(1) 13790 .m(3) 13791 .n(n) 13792 .k(k) 13793 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13794 } 13795 } 13796 } 13797 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,n_gt_8_strided_cn)13798 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_gt_8_strided_cn) { 13799 TEST_REQUIRES_ARM_NEON; 13800 for (uint32_t n = 9; n < 16; n++) { 13801 for (size_t k = 1; k <= 80; k += 17) { 13802 GemmMicrokernelTester() 13803 .mr(3) 13804 .nr(8) 13805 .kr(2) 13806 .sr(1) 13807 .m(3) 13808 .n(n) 13809 .k(k) 13810 .cn_stride(11) 13811 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13812 } 13813 } 13814 } 13815 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,n_gt_8_subtile)13816 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_gt_8_subtile) { 13817 TEST_REQUIRES_ARM_NEON; 13818 for (uint32_t n = 9; n < 16; n++) { 13819 for (size_t k = 1; k <= 80; k += 17) { 13820 for (uint32_t m = 1; m <= 3; m++) { 13821 GemmMicrokernelTester() 13822 .mr(3) 13823 .nr(8) 13824 .kr(2) 13825 .sr(1) 13826 .m(m) 13827 .n(n) 13828 .k(k) 13829 .iterations(1) 13830 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13831 } 13832 } 13833 } 13834 } 13835 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,n_div_8)13836 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_div_8) { 13837 TEST_REQUIRES_ARM_NEON; 13838 for (uint32_t n = 16; n <= 24; n += 8) { 13839 for (size_t k = 1; k <= 80; k += 17) { 13840 GemmMicrokernelTester() 13841 .mr(3) 13842 .nr(8) 13843 .kr(2) 13844 .sr(1) 13845 .m(3) 13846 .n(n) 13847 .k(k) 13848 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13849 } 13850 } 13851 } 13852 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,n_div_8_strided_cn)13853 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_div_8_strided_cn) { 13854 TEST_REQUIRES_ARM_NEON; 13855 for (uint32_t n = 16; n <= 24; n += 8) { 13856 for (size_t k = 1; k <= 80; k += 17) { 13857 GemmMicrokernelTester() 13858 .mr(3) 13859 .nr(8) 13860 .kr(2) 13861 .sr(1) 13862 .m(3) 13863 .n(n) 13864 .k(k) 13865 .cn_stride(11) 13866 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13867 } 13868 } 13869 } 13870 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,n_div_8_subtile)13871 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_div_8_subtile) { 13872 TEST_REQUIRES_ARM_NEON; 13873 for (uint32_t n = 16; n <= 24; n += 8) { 13874 for (size_t k = 1; k <= 80; k += 17) { 13875 for (uint32_t m = 1; m <= 3; m++) { 13876 GemmMicrokernelTester() 13877 .mr(3) 13878 .nr(8) 13879 .kr(2) 13880 .sr(1) 13881 .m(m) 13882 .n(n) 13883 .k(k) 13884 .iterations(1) 13885 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13886 } 13887 } 13888 } 13889 } 13890 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,small_kernel)13891 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, small_kernel) { 13892 TEST_REQUIRES_ARM_NEON; 13893 for (size_t k = 1; k <= 80; k += 17) { 13894 GemmMicrokernelTester() 13895 .mr(3) 13896 .nr(8) 13897 .kr(2) 13898 .sr(1) 13899 .m(3) 13900 .n(8) 13901 .k(k) 13902 .ks(3) 13903 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13904 } 13905 } 13906 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,small_kernel_subtile)13907 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, small_kernel_subtile) { 13908 TEST_REQUIRES_ARM_NEON; 13909 for (size_t k = 1; k <= 80; k += 17) { 13910 for (uint32_t n = 1; n <= 8; n++) { 13911 for (uint32_t m = 1; m <= 3; m++) { 13912 GemmMicrokernelTester() 13913 .mr(3) 13914 .nr(8) 13915 .kr(2) 13916 .sr(1) 13917 .m(m) 13918 .n(n) 13919 .k(k) 13920 .ks(3) 13921 .iterations(1) 13922 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13923 } 13924 } 13925 } 13926 } 13927 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,n_gt_8_small_kernel)13928 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_gt_8_small_kernel) { 13929 TEST_REQUIRES_ARM_NEON; 13930 for (uint32_t n = 9; n < 16; n++) { 13931 for (size_t k = 1; k <= 80; k += 17) { 13932 GemmMicrokernelTester() 13933 .mr(3) 13934 .nr(8) 13935 .kr(2) 13936 .sr(1) 13937 .m(3) 13938 .n(n) 13939 .k(k) 13940 .ks(3) 13941 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13942 } 13943 } 13944 } 13945 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,n_div_8_small_kernel)13946 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_div_8_small_kernel) { 13947 TEST_REQUIRES_ARM_NEON; 13948 for (uint32_t n = 16; n <= 24; n += 8) { 13949 for (size_t k = 1; k <= 80; k += 17) { 13950 GemmMicrokernelTester() 13951 .mr(3) 13952 .nr(8) 13953 .kr(2) 13954 .sr(1) 13955 .m(3) 13956 .n(n) 13957 .k(k) 13958 .ks(3) 13959 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13960 } 13961 } 13962 } 13963 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,strided_cm_subtile)13964 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, strided_cm_subtile) { 13965 TEST_REQUIRES_ARM_NEON; 13966 for (size_t k = 1; k <= 80; k += 17) { 13967 for (uint32_t n = 1; n <= 8; n++) { 13968 for (uint32_t m = 1; m <= 3; m++) { 13969 GemmMicrokernelTester() 13970 .mr(3) 13971 .nr(8) 13972 .kr(2) 13973 .sr(1) 13974 .m(m) 13975 .n(n) 13976 .k(k) 13977 .cm_stride(11) 13978 .iterations(1) 13979 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13980 } 13981 } 13982 } 13983 } 13984 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,a_offset)13985 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, a_offset) { 13986 TEST_REQUIRES_ARM_NEON; 13987 for (size_t k = 1; k <= 80; k += 17) { 13988 GemmMicrokernelTester() 13989 .mr(3) 13990 .nr(8) 13991 .kr(2) 13992 .sr(1) 13993 .m(3) 13994 .n(8) 13995 .k(k) 13996 .ks(3) 13997 .a_offset(251) 13998 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13999 } 14000 } 14001 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,zero)14002 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, zero) { 14003 TEST_REQUIRES_ARM_NEON; 14004 for (size_t k = 1; k <= 80; k += 17) { 14005 for (uint32_t mz = 0; mz < 3; mz++) { 14006 GemmMicrokernelTester() 14007 .mr(3) 14008 .nr(8) 14009 .kr(2) 14010 .sr(1) 14011 .m(3) 14012 .n(8) 14013 .k(k) 14014 .ks(3) 14015 .a_offset(251) 14016 .zero_index(mz) 14017 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14018 } 14019 } 14020 } 14021 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,qmin)14022 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, qmin) { 14023 TEST_REQUIRES_ARM_NEON; 14024 GemmMicrokernelTester() 14025 .mr(3) 14026 .nr(8) 14027 .kr(2) 14028 .sr(1) 14029 .m(3) 14030 .n(8) 14031 .k(16) 14032 .qmin(128) 14033 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14034 } 14035 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,qmax)14036 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, qmax) { 14037 TEST_REQUIRES_ARM_NEON; 14038 GemmMicrokernelTester() 14039 .mr(3) 14040 .nr(8) 14041 .kr(2) 14042 .sr(1) 14043 .m(3) 14044 .n(8) 14045 .k(16) 14046 .qmax(128) 14047 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14048 } 14049 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP,strided_cm)14050 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, strided_cm) { 14051 TEST_REQUIRES_ARM_NEON; 14052 GemmMicrokernelTester() 14053 .mr(3) 14054 .nr(8) 14055 .kr(2) 14056 .sr(1) 14057 .m(3) 14058 .n(8) 14059 .k(16) 14060 .cm_stride(11) 14061 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14062 } 14063 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 14064 14065 14066 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,k_eq_16)14067 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, k_eq_16) { 14068 TEST_REQUIRES_ARM_NEON; 14069 GemmMicrokernelTester() 14070 .mr(1) 14071 .nr(16) 14072 .kr(2) 14073 .sr(1) 14074 .m(1) 14075 .n(16) 14076 .k(16) 14077 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14078 } 14079 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,strided_cn)14080 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, strided_cn) { 14081 TEST_REQUIRES_ARM_NEON; 14082 GemmMicrokernelTester() 14083 .mr(1) 14084 .nr(16) 14085 .kr(2) 14086 .sr(1) 14087 .m(1) 14088 .n(16) 14089 .k(16) 14090 .cn_stride(19) 14091 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14092 } 14093 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,k_eq_16_subtile)14094 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, k_eq_16_subtile) { 14095 TEST_REQUIRES_ARM_NEON; 14096 for (uint32_t n = 1; n <= 16; n++) { 14097 for (uint32_t m = 1; m <= 1; m++) { 14098 GemmMicrokernelTester() 14099 .mr(1) 14100 .nr(16) 14101 .kr(2) 14102 .sr(1) 14103 .m(m) 14104 .n(n) 14105 .k(16) 14106 .iterations(1) 14107 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14108 } 14109 } 14110 } 14111 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,k_eq_16_subtile_m)14112 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, k_eq_16_subtile_m) { 14113 TEST_REQUIRES_ARM_NEON; 14114 for (uint32_t m = 1; m <= 1; m++) { 14115 GemmMicrokernelTester() 14116 .mr(1) 14117 .nr(16) 14118 .kr(2) 14119 .sr(1) 14120 .m(m) 14121 .n(16) 14122 .k(16) 14123 .iterations(1) 14124 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14125 } 14126 } 14127 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,k_eq_16_subtile_n)14128 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, k_eq_16_subtile_n) { 14129 TEST_REQUIRES_ARM_NEON; 14130 for (uint32_t n = 1; n <= 16; n++) { 14131 GemmMicrokernelTester() 14132 .mr(1) 14133 .nr(16) 14134 .kr(2) 14135 .sr(1) 14136 .m(1) 14137 .n(n) 14138 .k(16) 14139 .iterations(1) 14140 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14141 } 14142 } 14143 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,k_lt_16)14144 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, k_lt_16) { 14145 TEST_REQUIRES_ARM_NEON; 14146 for (size_t k = 1; k < 16; k++) { 14147 GemmMicrokernelTester() 14148 .mr(1) 14149 .nr(16) 14150 .kr(2) 14151 .sr(1) 14152 .m(1) 14153 .n(16) 14154 .k(k) 14155 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14156 } 14157 } 14158 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,k_lt_16_subtile)14159 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, k_lt_16_subtile) { 14160 TEST_REQUIRES_ARM_NEON; 14161 for (size_t k = 1; k < 16; k++) { 14162 for (uint32_t n = 1; n <= 16; n++) { 14163 for (uint32_t m = 1; m <= 1; m++) { 14164 GemmMicrokernelTester() 14165 .mr(1) 14166 .nr(16) 14167 .kr(2) 14168 .sr(1) 14169 .m(m) 14170 .n(n) 14171 .k(k) 14172 .iterations(1) 14173 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14174 } 14175 } 14176 } 14177 } 14178 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,k_gt_16)14179 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, k_gt_16) { 14180 TEST_REQUIRES_ARM_NEON; 14181 for (size_t k = 17; k < 32; k++) { 14182 GemmMicrokernelTester() 14183 .mr(1) 14184 .nr(16) 14185 .kr(2) 14186 .sr(1) 14187 .m(1) 14188 .n(16) 14189 .k(k) 14190 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14191 } 14192 } 14193 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,k_gt_16_subtile)14194 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, k_gt_16_subtile) { 14195 TEST_REQUIRES_ARM_NEON; 14196 for (size_t k = 17; k < 32; k++) { 14197 for (uint32_t n = 1; n <= 16; n++) { 14198 for (uint32_t m = 1; m <= 1; m++) { 14199 GemmMicrokernelTester() 14200 .mr(1) 14201 .nr(16) 14202 .kr(2) 14203 .sr(1) 14204 .m(m) 14205 .n(n) 14206 .k(k) 14207 .iterations(1) 14208 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14209 } 14210 } 14211 } 14212 } 14213 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,k_div_16)14214 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, k_div_16) { 14215 TEST_REQUIRES_ARM_NEON; 14216 for (size_t k = 32; k <= 160; k += 16) { 14217 GemmMicrokernelTester() 14218 .mr(1) 14219 .nr(16) 14220 .kr(2) 14221 .sr(1) 14222 .m(1) 14223 .n(16) 14224 .k(k) 14225 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14226 } 14227 } 14228 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,k_div_16_subtile)14229 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, k_div_16_subtile) { 14230 TEST_REQUIRES_ARM_NEON; 14231 for (size_t k = 32; k <= 160; k += 16) { 14232 for (uint32_t n = 1; n <= 16; n++) { 14233 for (uint32_t m = 1; m <= 1; m++) { 14234 GemmMicrokernelTester() 14235 .mr(1) 14236 .nr(16) 14237 .kr(2) 14238 .sr(1) 14239 .m(m) 14240 .n(n) 14241 .k(k) 14242 .iterations(1) 14243 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14244 } 14245 } 14246 } 14247 } 14248 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,n_gt_16)14249 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, n_gt_16) { 14250 TEST_REQUIRES_ARM_NEON; 14251 for (uint32_t n = 17; n < 32; n++) { 14252 for (size_t k = 1; k <= 80; k += 17) { 14253 GemmMicrokernelTester() 14254 .mr(1) 14255 .nr(16) 14256 .kr(2) 14257 .sr(1) 14258 .m(1) 14259 .n(n) 14260 .k(k) 14261 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14262 } 14263 } 14264 } 14265 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,n_gt_16_strided_cn)14266 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, n_gt_16_strided_cn) { 14267 TEST_REQUIRES_ARM_NEON; 14268 for (uint32_t n = 17; n < 32; n++) { 14269 for (size_t k = 1; k <= 80; k += 17) { 14270 GemmMicrokernelTester() 14271 .mr(1) 14272 .nr(16) 14273 .kr(2) 14274 .sr(1) 14275 .m(1) 14276 .n(n) 14277 .k(k) 14278 .cn_stride(19) 14279 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14280 } 14281 } 14282 } 14283 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,n_gt_16_subtile)14284 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, n_gt_16_subtile) { 14285 TEST_REQUIRES_ARM_NEON; 14286 for (uint32_t n = 17; n < 32; n++) { 14287 for (size_t k = 1; k <= 80; k += 17) { 14288 for (uint32_t m = 1; m <= 1; m++) { 14289 GemmMicrokernelTester() 14290 .mr(1) 14291 .nr(16) 14292 .kr(2) 14293 .sr(1) 14294 .m(m) 14295 .n(n) 14296 .k(k) 14297 .iterations(1) 14298 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14299 } 14300 } 14301 } 14302 } 14303 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,n_div_16)14304 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, n_div_16) { 14305 TEST_REQUIRES_ARM_NEON; 14306 for (uint32_t n = 32; n <= 48; n += 16) { 14307 for (size_t k = 1; k <= 80; k += 17) { 14308 GemmMicrokernelTester() 14309 .mr(1) 14310 .nr(16) 14311 .kr(2) 14312 .sr(1) 14313 .m(1) 14314 .n(n) 14315 .k(k) 14316 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14317 } 14318 } 14319 } 14320 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,n_div_16_strided_cn)14321 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, n_div_16_strided_cn) { 14322 TEST_REQUIRES_ARM_NEON; 14323 for (uint32_t n = 32; n <= 48; n += 16) { 14324 for (size_t k = 1; k <= 80; k += 17) { 14325 GemmMicrokernelTester() 14326 .mr(1) 14327 .nr(16) 14328 .kr(2) 14329 .sr(1) 14330 .m(1) 14331 .n(n) 14332 .k(k) 14333 .cn_stride(19) 14334 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14335 } 14336 } 14337 } 14338 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,n_div_16_subtile)14339 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, n_div_16_subtile) { 14340 TEST_REQUIRES_ARM_NEON; 14341 for (uint32_t n = 32; n <= 48; n += 16) { 14342 for (size_t k = 1; k <= 80; k += 17) { 14343 for (uint32_t m = 1; m <= 1; m++) { 14344 GemmMicrokernelTester() 14345 .mr(1) 14346 .nr(16) 14347 .kr(2) 14348 .sr(1) 14349 .m(m) 14350 .n(n) 14351 .k(k) 14352 .iterations(1) 14353 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14354 } 14355 } 14356 } 14357 } 14358 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,small_kernel)14359 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, small_kernel) { 14360 TEST_REQUIRES_ARM_NEON; 14361 for (size_t k = 1; k <= 80; k += 17) { 14362 GemmMicrokernelTester() 14363 .mr(1) 14364 .nr(16) 14365 .kr(2) 14366 .sr(1) 14367 .m(1) 14368 .n(16) 14369 .k(k) 14370 .ks(3) 14371 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14372 } 14373 } 14374 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,small_kernel_subtile)14375 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, small_kernel_subtile) { 14376 TEST_REQUIRES_ARM_NEON; 14377 for (size_t k = 1; k <= 80; k += 17) { 14378 for (uint32_t n = 1; n <= 16; n++) { 14379 for (uint32_t m = 1; m <= 1; m++) { 14380 GemmMicrokernelTester() 14381 .mr(1) 14382 .nr(16) 14383 .kr(2) 14384 .sr(1) 14385 .m(m) 14386 .n(n) 14387 .k(k) 14388 .ks(3) 14389 .iterations(1) 14390 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14391 } 14392 } 14393 } 14394 } 14395 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,n_gt_16_small_kernel)14396 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, n_gt_16_small_kernel) { 14397 TEST_REQUIRES_ARM_NEON; 14398 for (uint32_t n = 17; n < 32; n++) { 14399 for (size_t k = 1; k <= 80; k += 17) { 14400 GemmMicrokernelTester() 14401 .mr(1) 14402 .nr(16) 14403 .kr(2) 14404 .sr(1) 14405 .m(1) 14406 .n(n) 14407 .k(k) 14408 .ks(3) 14409 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14410 } 14411 } 14412 } 14413 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,n_div_16_small_kernel)14414 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, n_div_16_small_kernel) { 14415 TEST_REQUIRES_ARM_NEON; 14416 for (uint32_t n = 32; n <= 48; n += 16) { 14417 for (size_t k = 1; k <= 80; k += 17) { 14418 GemmMicrokernelTester() 14419 .mr(1) 14420 .nr(16) 14421 .kr(2) 14422 .sr(1) 14423 .m(1) 14424 .n(n) 14425 .k(k) 14426 .ks(3) 14427 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14428 } 14429 } 14430 } 14431 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,strided_cm_subtile)14432 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, strided_cm_subtile) { 14433 TEST_REQUIRES_ARM_NEON; 14434 for (size_t k = 1; k <= 80; k += 17) { 14435 for (uint32_t n = 1; n <= 16; n++) { 14436 for (uint32_t m = 1; m <= 1; m++) { 14437 GemmMicrokernelTester() 14438 .mr(1) 14439 .nr(16) 14440 .kr(2) 14441 .sr(1) 14442 .m(m) 14443 .n(n) 14444 .k(k) 14445 .cm_stride(19) 14446 .iterations(1) 14447 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14448 } 14449 } 14450 } 14451 } 14452 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,a_offset)14453 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, a_offset) { 14454 TEST_REQUIRES_ARM_NEON; 14455 for (size_t k = 1; k <= 80; k += 17) { 14456 GemmMicrokernelTester() 14457 .mr(1) 14458 .nr(16) 14459 .kr(2) 14460 .sr(1) 14461 .m(1) 14462 .n(16) 14463 .k(k) 14464 .ks(3) 14465 .a_offset(83) 14466 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14467 } 14468 } 14469 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,zero)14470 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, zero) { 14471 TEST_REQUIRES_ARM_NEON; 14472 for (size_t k = 1; k <= 80; k += 17) { 14473 for (uint32_t mz = 0; mz < 1; mz++) { 14474 GemmMicrokernelTester() 14475 .mr(1) 14476 .nr(16) 14477 .kr(2) 14478 .sr(1) 14479 .m(1) 14480 .n(16) 14481 .k(k) 14482 .ks(3) 14483 .a_offset(83) 14484 .zero_index(mz) 14485 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14486 } 14487 } 14488 } 14489 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,qmin)14490 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, qmin) { 14491 TEST_REQUIRES_ARM_NEON; 14492 GemmMicrokernelTester() 14493 .mr(1) 14494 .nr(16) 14495 .kr(2) 14496 .sr(1) 14497 .m(1) 14498 .n(16) 14499 .k(16) 14500 .qmin(128) 14501 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14502 } 14503 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,qmax)14504 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, qmax) { 14505 TEST_REQUIRES_ARM_NEON; 14506 GemmMicrokernelTester() 14507 .mr(1) 14508 .nr(16) 14509 .kr(2) 14510 .sr(1) 14511 .m(1) 14512 .n(16) 14513 .k(16) 14514 .qmax(128) 14515 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14516 } 14517 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP,strided_cm)14518 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, strided_cm) { 14519 TEST_REQUIRES_ARM_NEON; 14520 GemmMicrokernelTester() 14521 .mr(1) 14522 .nr(16) 14523 .kr(2) 14524 .sr(1) 14525 .m(1) 14526 .n(16) 14527 .k(16) 14528 .cm_stride(19) 14529 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14530 } 14531 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 14532 14533 14534 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,k_eq_16)14535 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, k_eq_16) { 14536 TEST_REQUIRES_ARM_NEON; 14537 GemmMicrokernelTester() 14538 .mr(2) 14539 .nr(8) 14540 .kr(8) 14541 .sr(1) 14542 .m(2) 14543 .n(8) 14544 .k(16) 14545 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14546 } 14547 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,strided_cn)14548 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, strided_cn) { 14549 TEST_REQUIRES_ARM_NEON; 14550 GemmMicrokernelTester() 14551 .mr(2) 14552 .nr(8) 14553 .kr(8) 14554 .sr(1) 14555 .m(2) 14556 .n(8) 14557 .k(16) 14558 .cn_stride(11) 14559 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14560 } 14561 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile)14562 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile) { 14563 TEST_REQUIRES_ARM_NEON; 14564 for (uint32_t n = 1; n <= 8; n++) { 14565 for (uint32_t m = 1; m <= 2; m++) { 14566 GemmMicrokernelTester() 14567 .mr(2) 14568 .nr(8) 14569 .kr(8) 14570 .sr(1) 14571 .m(m) 14572 .n(n) 14573 .k(16) 14574 .iterations(1) 14575 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14576 } 14577 } 14578 } 14579 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile_m)14580 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile_m) { 14581 TEST_REQUIRES_ARM_NEON; 14582 for (uint32_t m = 1; m <= 2; m++) { 14583 GemmMicrokernelTester() 14584 .mr(2) 14585 .nr(8) 14586 .kr(8) 14587 .sr(1) 14588 .m(m) 14589 .n(8) 14590 .k(16) 14591 .iterations(1) 14592 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14593 } 14594 } 14595 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile_n)14596 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile_n) { 14597 TEST_REQUIRES_ARM_NEON; 14598 for (uint32_t n = 1; n <= 8; n++) { 14599 GemmMicrokernelTester() 14600 .mr(2) 14601 .nr(8) 14602 .kr(8) 14603 .sr(1) 14604 .m(2) 14605 .n(n) 14606 .k(16) 14607 .iterations(1) 14608 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14609 } 14610 } 14611 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,k_lt_16)14612 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, k_lt_16) { 14613 TEST_REQUIRES_ARM_NEON; 14614 for (size_t k = 1; k < 16; k++) { 14615 GemmMicrokernelTester() 14616 .mr(2) 14617 .nr(8) 14618 .kr(8) 14619 .sr(1) 14620 .m(2) 14621 .n(8) 14622 .k(k) 14623 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14624 } 14625 } 14626 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,k_lt_16_subtile)14627 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, k_lt_16_subtile) { 14628 TEST_REQUIRES_ARM_NEON; 14629 for (size_t k = 1; k < 16; k++) { 14630 for (uint32_t n = 1; n <= 8; n++) { 14631 for (uint32_t m = 1; m <= 2; m++) { 14632 GemmMicrokernelTester() 14633 .mr(2) 14634 .nr(8) 14635 .kr(8) 14636 .sr(1) 14637 .m(m) 14638 .n(n) 14639 .k(k) 14640 .iterations(1) 14641 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14642 } 14643 } 14644 } 14645 } 14646 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,k_gt_16)14647 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, k_gt_16) { 14648 TEST_REQUIRES_ARM_NEON; 14649 for (size_t k = 17; k < 32; k++) { 14650 GemmMicrokernelTester() 14651 .mr(2) 14652 .nr(8) 14653 .kr(8) 14654 .sr(1) 14655 .m(2) 14656 .n(8) 14657 .k(k) 14658 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14659 } 14660 } 14661 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,k_gt_16_subtile)14662 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, k_gt_16_subtile) { 14663 TEST_REQUIRES_ARM_NEON; 14664 for (size_t k = 17; k < 32; k++) { 14665 for (uint32_t n = 1; n <= 8; n++) { 14666 for (uint32_t m = 1; m <= 2; m++) { 14667 GemmMicrokernelTester() 14668 .mr(2) 14669 .nr(8) 14670 .kr(8) 14671 .sr(1) 14672 .m(m) 14673 .n(n) 14674 .k(k) 14675 .iterations(1) 14676 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14677 } 14678 } 14679 } 14680 } 14681 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,k_div_16)14682 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, k_div_16) { 14683 TEST_REQUIRES_ARM_NEON; 14684 for (size_t k = 32; k <= 160; k += 16) { 14685 GemmMicrokernelTester() 14686 .mr(2) 14687 .nr(8) 14688 .kr(8) 14689 .sr(1) 14690 .m(2) 14691 .n(8) 14692 .k(k) 14693 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14694 } 14695 } 14696 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,k_div_16_subtile)14697 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, k_div_16_subtile) { 14698 TEST_REQUIRES_ARM_NEON; 14699 for (size_t k = 32; k <= 160; k += 16) { 14700 for (uint32_t n = 1; n <= 8; n++) { 14701 for (uint32_t m = 1; m <= 2; m++) { 14702 GemmMicrokernelTester() 14703 .mr(2) 14704 .nr(8) 14705 .kr(8) 14706 .sr(1) 14707 .m(m) 14708 .n(n) 14709 .k(k) 14710 .iterations(1) 14711 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14712 } 14713 } 14714 } 14715 } 14716 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,n_gt_8)14717 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, n_gt_8) { 14718 TEST_REQUIRES_ARM_NEON; 14719 for (uint32_t n = 9; n < 16; n++) { 14720 for (size_t k = 1; k <= 80; k += 17) { 14721 GemmMicrokernelTester() 14722 .mr(2) 14723 .nr(8) 14724 .kr(8) 14725 .sr(1) 14726 .m(2) 14727 .n(n) 14728 .k(k) 14729 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14730 } 14731 } 14732 } 14733 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,n_gt_8_strided_cn)14734 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, n_gt_8_strided_cn) { 14735 TEST_REQUIRES_ARM_NEON; 14736 for (uint32_t n = 9; n < 16; n++) { 14737 for (size_t k = 1; k <= 80; k += 17) { 14738 GemmMicrokernelTester() 14739 .mr(2) 14740 .nr(8) 14741 .kr(8) 14742 .sr(1) 14743 .m(2) 14744 .n(n) 14745 .k(k) 14746 .cn_stride(11) 14747 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14748 } 14749 } 14750 } 14751 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,n_gt_8_subtile)14752 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, n_gt_8_subtile) { 14753 TEST_REQUIRES_ARM_NEON; 14754 for (uint32_t n = 9; n < 16; n++) { 14755 for (size_t k = 1; k <= 80; k += 17) { 14756 for (uint32_t m = 1; m <= 2; m++) { 14757 GemmMicrokernelTester() 14758 .mr(2) 14759 .nr(8) 14760 .kr(8) 14761 .sr(1) 14762 .m(m) 14763 .n(n) 14764 .k(k) 14765 .iterations(1) 14766 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14767 } 14768 } 14769 } 14770 } 14771 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,n_div_8)14772 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, n_div_8) { 14773 TEST_REQUIRES_ARM_NEON; 14774 for (uint32_t n = 16; n <= 24; n += 8) { 14775 for (size_t k = 1; k <= 80; k += 17) { 14776 GemmMicrokernelTester() 14777 .mr(2) 14778 .nr(8) 14779 .kr(8) 14780 .sr(1) 14781 .m(2) 14782 .n(n) 14783 .k(k) 14784 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14785 } 14786 } 14787 } 14788 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,n_div_8_strided_cn)14789 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, n_div_8_strided_cn) { 14790 TEST_REQUIRES_ARM_NEON; 14791 for (uint32_t n = 16; n <= 24; n += 8) { 14792 for (size_t k = 1; k <= 80; k += 17) { 14793 GemmMicrokernelTester() 14794 .mr(2) 14795 .nr(8) 14796 .kr(8) 14797 .sr(1) 14798 .m(2) 14799 .n(n) 14800 .k(k) 14801 .cn_stride(11) 14802 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14803 } 14804 } 14805 } 14806 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,n_div_8_subtile)14807 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, n_div_8_subtile) { 14808 TEST_REQUIRES_ARM_NEON; 14809 for (uint32_t n = 16; n <= 24; n += 8) { 14810 for (size_t k = 1; k <= 80; k += 17) { 14811 for (uint32_t m = 1; m <= 2; m++) { 14812 GemmMicrokernelTester() 14813 .mr(2) 14814 .nr(8) 14815 .kr(8) 14816 .sr(1) 14817 .m(m) 14818 .n(n) 14819 .k(k) 14820 .iterations(1) 14821 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14822 } 14823 } 14824 } 14825 } 14826 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,small_kernel)14827 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, small_kernel) { 14828 TEST_REQUIRES_ARM_NEON; 14829 for (size_t k = 1; k <= 80; k += 17) { 14830 GemmMicrokernelTester() 14831 .mr(2) 14832 .nr(8) 14833 .kr(8) 14834 .sr(1) 14835 .m(2) 14836 .n(8) 14837 .k(k) 14838 .ks(3) 14839 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14840 } 14841 } 14842 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,small_kernel_subtile)14843 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, small_kernel_subtile) { 14844 TEST_REQUIRES_ARM_NEON; 14845 for (size_t k = 1; k <= 80; k += 17) { 14846 for (uint32_t n = 1; n <= 8; n++) { 14847 for (uint32_t m = 1; m <= 2; m++) { 14848 GemmMicrokernelTester() 14849 .mr(2) 14850 .nr(8) 14851 .kr(8) 14852 .sr(1) 14853 .m(m) 14854 .n(n) 14855 .k(k) 14856 .ks(3) 14857 .iterations(1) 14858 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14859 } 14860 } 14861 } 14862 } 14863 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,n_gt_8_small_kernel)14864 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, n_gt_8_small_kernel) { 14865 TEST_REQUIRES_ARM_NEON; 14866 for (uint32_t n = 9; n < 16; n++) { 14867 for (size_t k = 1; k <= 80; k += 17) { 14868 GemmMicrokernelTester() 14869 .mr(2) 14870 .nr(8) 14871 .kr(8) 14872 .sr(1) 14873 .m(2) 14874 .n(n) 14875 .k(k) 14876 .ks(3) 14877 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14878 } 14879 } 14880 } 14881 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,n_div_8_small_kernel)14882 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, n_div_8_small_kernel) { 14883 TEST_REQUIRES_ARM_NEON; 14884 for (uint32_t n = 16; n <= 24; n += 8) { 14885 for (size_t k = 1; k <= 80; k += 17) { 14886 GemmMicrokernelTester() 14887 .mr(2) 14888 .nr(8) 14889 .kr(8) 14890 .sr(1) 14891 .m(2) 14892 .n(n) 14893 .k(k) 14894 .ks(3) 14895 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14896 } 14897 } 14898 } 14899 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,strided_cm_subtile)14900 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, strided_cm_subtile) { 14901 TEST_REQUIRES_ARM_NEON; 14902 for (size_t k = 1; k <= 80; k += 17) { 14903 for (uint32_t n = 1; n <= 8; n++) { 14904 for (uint32_t m = 1; m <= 2; m++) { 14905 GemmMicrokernelTester() 14906 .mr(2) 14907 .nr(8) 14908 .kr(8) 14909 .sr(1) 14910 .m(m) 14911 .n(n) 14912 .k(k) 14913 .cm_stride(11) 14914 .iterations(1) 14915 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14916 } 14917 } 14918 } 14919 } 14920 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,a_offset)14921 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, a_offset) { 14922 TEST_REQUIRES_ARM_NEON; 14923 for (size_t k = 1; k <= 80; k += 17) { 14924 GemmMicrokernelTester() 14925 .mr(2) 14926 .nr(8) 14927 .kr(8) 14928 .sr(1) 14929 .m(2) 14930 .n(8) 14931 .k(k) 14932 .ks(3) 14933 .a_offset(163) 14934 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14935 } 14936 } 14937 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,zero)14938 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, zero) { 14939 TEST_REQUIRES_ARM_NEON; 14940 for (size_t k = 1; k <= 80; k += 17) { 14941 for (uint32_t mz = 0; mz < 2; mz++) { 14942 GemmMicrokernelTester() 14943 .mr(2) 14944 .nr(8) 14945 .kr(8) 14946 .sr(1) 14947 .m(2) 14948 .n(8) 14949 .k(k) 14950 .ks(3) 14951 .a_offset(163) 14952 .zero_index(mz) 14953 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14954 } 14955 } 14956 } 14957 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,qmin)14958 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, qmin) { 14959 TEST_REQUIRES_ARM_NEON; 14960 GemmMicrokernelTester() 14961 .mr(2) 14962 .nr(8) 14963 .kr(8) 14964 .sr(1) 14965 .m(2) 14966 .n(8) 14967 .k(16) 14968 .qmin(128) 14969 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14970 } 14971 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,qmax)14972 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, qmax) { 14973 TEST_REQUIRES_ARM_NEON; 14974 GemmMicrokernelTester() 14975 .mr(2) 14976 .nr(8) 14977 .kr(8) 14978 .sr(1) 14979 .m(2) 14980 .n(8) 14981 .k(16) 14982 .qmax(128) 14983 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14984 } 14985 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL,strided_cm)14986 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL, strided_cm) { 14987 TEST_REQUIRES_ARM_NEON; 14988 GemmMicrokernelTester() 14989 .mr(2) 14990 .nr(8) 14991 .kr(8) 14992 .sr(1) 14993 .m(2) 14994 .n(8) 14995 .k(16) 14996 .cm_stride(11) 14997 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14998 } 14999 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY 15000 15001 15002 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,k_eq_16)15003 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, k_eq_16) { 15004 TEST_REQUIRES_ARM_NEON; 15005 GemmMicrokernelTester() 15006 .mr(1) 15007 .nr(8) 15008 .kr(8) 15009 .sr(1) 15010 .m(1) 15011 .n(8) 15012 .k(16) 15013 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15014 } 15015 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,strided_cn)15016 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, strided_cn) { 15017 TEST_REQUIRES_ARM_NEON; 15018 GemmMicrokernelTester() 15019 .mr(1) 15020 .nr(8) 15021 .kr(8) 15022 .sr(1) 15023 .m(1) 15024 .n(8) 15025 .k(16) 15026 .cn_stride(11) 15027 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15028 } 15029 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile)15030 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile) { 15031 TEST_REQUIRES_ARM_NEON; 15032 for (uint32_t n = 1; n <= 8; n++) { 15033 for (uint32_t m = 1; m <= 1; m++) { 15034 GemmMicrokernelTester() 15035 .mr(1) 15036 .nr(8) 15037 .kr(8) 15038 .sr(1) 15039 .m(m) 15040 .n(n) 15041 .k(16) 15042 .iterations(1) 15043 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15044 } 15045 } 15046 } 15047 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile_m)15048 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile_m) { 15049 TEST_REQUIRES_ARM_NEON; 15050 for (uint32_t m = 1; m <= 1; m++) { 15051 GemmMicrokernelTester() 15052 .mr(1) 15053 .nr(8) 15054 .kr(8) 15055 .sr(1) 15056 .m(m) 15057 .n(8) 15058 .k(16) 15059 .iterations(1) 15060 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15061 } 15062 } 15063 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile_n)15064 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile_n) { 15065 TEST_REQUIRES_ARM_NEON; 15066 for (uint32_t n = 1; n <= 8; n++) { 15067 GemmMicrokernelTester() 15068 .mr(1) 15069 .nr(8) 15070 .kr(8) 15071 .sr(1) 15072 .m(1) 15073 .n(n) 15074 .k(16) 15075 .iterations(1) 15076 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15077 } 15078 } 15079 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,k_lt_16)15080 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, k_lt_16) { 15081 TEST_REQUIRES_ARM_NEON; 15082 for (size_t k = 1; k < 16; k++) { 15083 GemmMicrokernelTester() 15084 .mr(1) 15085 .nr(8) 15086 .kr(8) 15087 .sr(1) 15088 .m(1) 15089 .n(8) 15090 .k(k) 15091 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15092 } 15093 } 15094 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,k_lt_16_subtile)15095 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, k_lt_16_subtile) { 15096 TEST_REQUIRES_ARM_NEON; 15097 for (size_t k = 1; k < 16; k++) { 15098 for (uint32_t n = 1; n <= 8; n++) { 15099 for (uint32_t m = 1; m <= 1; m++) { 15100 GemmMicrokernelTester() 15101 .mr(1) 15102 .nr(8) 15103 .kr(8) 15104 .sr(1) 15105 .m(m) 15106 .n(n) 15107 .k(k) 15108 .iterations(1) 15109 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15110 } 15111 } 15112 } 15113 } 15114 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,k_gt_16)15115 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, k_gt_16) { 15116 TEST_REQUIRES_ARM_NEON; 15117 for (size_t k = 17; k < 32; k++) { 15118 GemmMicrokernelTester() 15119 .mr(1) 15120 .nr(8) 15121 .kr(8) 15122 .sr(1) 15123 .m(1) 15124 .n(8) 15125 .k(k) 15126 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15127 } 15128 } 15129 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,k_gt_16_subtile)15130 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, k_gt_16_subtile) { 15131 TEST_REQUIRES_ARM_NEON; 15132 for (size_t k = 17; k < 32; k++) { 15133 for (uint32_t n = 1; n <= 8; n++) { 15134 for (uint32_t m = 1; m <= 1; m++) { 15135 GemmMicrokernelTester() 15136 .mr(1) 15137 .nr(8) 15138 .kr(8) 15139 .sr(1) 15140 .m(m) 15141 .n(n) 15142 .k(k) 15143 .iterations(1) 15144 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15145 } 15146 } 15147 } 15148 } 15149 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,k_div_16)15150 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, k_div_16) { 15151 TEST_REQUIRES_ARM_NEON; 15152 for (size_t k = 32; k <= 160; k += 16) { 15153 GemmMicrokernelTester() 15154 .mr(1) 15155 .nr(8) 15156 .kr(8) 15157 .sr(1) 15158 .m(1) 15159 .n(8) 15160 .k(k) 15161 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15162 } 15163 } 15164 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,k_div_16_subtile)15165 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, k_div_16_subtile) { 15166 TEST_REQUIRES_ARM_NEON; 15167 for (size_t k = 32; k <= 160; k += 16) { 15168 for (uint32_t n = 1; n <= 8; n++) { 15169 for (uint32_t m = 1; m <= 1; m++) { 15170 GemmMicrokernelTester() 15171 .mr(1) 15172 .nr(8) 15173 .kr(8) 15174 .sr(1) 15175 .m(m) 15176 .n(n) 15177 .k(k) 15178 .iterations(1) 15179 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15180 } 15181 } 15182 } 15183 } 15184 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,n_gt_8)15185 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, n_gt_8) { 15186 TEST_REQUIRES_ARM_NEON; 15187 for (uint32_t n = 9; n < 16; n++) { 15188 for (size_t k = 1; k <= 80; k += 17) { 15189 GemmMicrokernelTester() 15190 .mr(1) 15191 .nr(8) 15192 .kr(8) 15193 .sr(1) 15194 .m(1) 15195 .n(n) 15196 .k(k) 15197 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15198 } 15199 } 15200 } 15201 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,n_gt_8_strided_cn)15202 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, n_gt_8_strided_cn) { 15203 TEST_REQUIRES_ARM_NEON; 15204 for (uint32_t n = 9; n < 16; n++) { 15205 for (size_t k = 1; k <= 80; k += 17) { 15206 GemmMicrokernelTester() 15207 .mr(1) 15208 .nr(8) 15209 .kr(8) 15210 .sr(1) 15211 .m(1) 15212 .n(n) 15213 .k(k) 15214 .cn_stride(11) 15215 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15216 } 15217 } 15218 } 15219 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,n_gt_8_subtile)15220 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, n_gt_8_subtile) { 15221 TEST_REQUIRES_ARM_NEON; 15222 for (uint32_t n = 9; n < 16; n++) { 15223 for (size_t k = 1; k <= 80; k += 17) { 15224 for (uint32_t m = 1; m <= 1; m++) { 15225 GemmMicrokernelTester() 15226 .mr(1) 15227 .nr(8) 15228 .kr(8) 15229 .sr(1) 15230 .m(m) 15231 .n(n) 15232 .k(k) 15233 .iterations(1) 15234 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15235 } 15236 } 15237 } 15238 } 15239 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,n_div_8)15240 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, n_div_8) { 15241 TEST_REQUIRES_ARM_NEON; 15242 for (uint32_t n = 16; n <= 24; n += 8) { 15243 for (size_t k = 1; k <= 80; k += 17) { 15244 GemmMicrokernelTester() 15245 .mr(1) 15246 .nr(8) 15247 .kr(8) 15248 .sr(1) 15249 .m(1) 15250 .n(n) 15251 .k(k) 15252 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15253 } 15254 } 15255 } 15256 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,n_div_8_strided_cn)15257 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, n_div_8_strided_cn) { 15258 TEST_REQUIRES_ARM_NEON; 15259 for (uint32_t n = 16; n <= 24; n += 8) { 15260 for (size_t k = 1; k <= 80; k += 17) { 15261 GemmMicrokernelTester() 15262 .mr(1) 15263 .nr(8) 15264 .kr(8) 15265 .sr(1) 15266 .m(1) 15267 .n(n) 15268 .k(k) 15269 .cn_stride(11) 15270 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15271 } 15272 } 15273 } 15274 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,n_div_8_subtile)15275 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, n_div_8_subtile) { 15276 TEST_REQUIRES_ARM_NEON; 15277 for (uint32_t n = 16; n <= 24; n += 8) { 15278 for (size_t k = 1; k <= 80; k += 17) { 15279 for (uint32_t m = 1; m <= 1; m++) { 15280 GemmMicrokernelTester() 15281 .mr(1) 15282 .nr(8) 15283 .kr(8) 15284 .sr(1) 15285 .m(m) 15286 .n(n) 15287 .k(k) 15288 .iterations(1) 15289 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15290 } 15291 } 15292 } 15293 } 15294 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,small_kernel)15295 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, small_kernel) { 15296 TEST_REQUIRES_ARM_NEON; 15297 for (size_t k = 1; k <= 80; k += 17) { 15298 GemmMicrokernelTester() 15299 .mr(1) 15300 .nr(8) 15301 .kr(8) 15302 .sr(1) 15303 .m(1) 15304 .n(8) 15305 .k(k) 15306 .ks(3) 15307 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15308 } 15309 } 15310 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,small_kernel_subtile)15311 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, small_kernel_subtile) { 15312 TEST_REQUIRES_ARM_NEON; 15313 for (size_t k = 1; k <= 80; k += 17) { 15314 for (uint32_t n = 1; n <= 8; n++) { 15315 for (uint32_t m = 1; m <= 1; m++) { 15316 GemmMicrokernelTester() 15317 .mr(1) 15318 .nr(8) 15319 .kr(8) 15320 .sr(1) 15321 .m(m) 15322 .n(n) 15323 .k(k) 15324 .ks(3) 15325 .iterations(1) 15326 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15327 } 15328 } 15329 } 15330 } 15331 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,n_gt_8_small_kernel)15332 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, n_gt_8_small_kernel) { 15333 TEST_REQUIRES_ARM_NEON; 15334 for (uint32_t n = 9; n < 16; n++) { 15335 for (size_t k = 1; k <= 80; k += 17) { 15336 GemmMicrokernelTester() 15337 .mr(1) 15338 .nr(8) 15339 .kr(8) 15340 .sr(1) 15341 .m(1) 15342 .n(n) 15343 .k(k) 15344 .ks(3) 15345 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15346 } 15347 } 15348 } 15349 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,n_div_8_small_kernel)15350 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, n_div_8_small_kernel) { 15351 TEST_REQUIRES_ARM_NEON; 15352 for (uint32_t n = 16; n <= 24; n += 8) { 15353 for (size_t k = 1; k <= 80; k += 17) { 15354 GemmMicrokernelTester() 15355 .mr(1) 15356 .nr(8) 15357 .kr(8) 15358 .sr(1) 15359 .m(1) 15360 .n(n) 15361 .k(k) 15362 .ks(3) 15363 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15364 } 15365 } 15366 } 15367 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,strided_cm_subtile)15368 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, strided_cm_subtile) { 15369 TEST_REQUIRES_ARM_NEON; 15370 for (size_t k = 1; k <= 80; k += 17) { 15371 for (uint32_t n = 1; n <= 8; n++) { 15372 for (uint32_t m = 1; m <= 1; m++) { 15373 GemmMicrokernelTester() 15374 .mr(1) 15375 .nr(8) 15376 .kr(8) 15377 .sr(1) 15378 .m(m) 15379 .n(n) 15380 .k(k) 15381 .cm_stride(11) 15382 .iterations(1) 15383 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15384 } 15385 } 15386 } 15387 } 15388 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,a_offset)15389 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, a_offset) { 15390 TEST_REQUIRES_ARM_NEON; 15391 for (size_t k = 1; k <= 80; k += 17) { 15392 GemmMicrokernelTester() 15393 .mr(1) 15394 .nr(8) 15395 .kr(8) 15396 .sr(1) 15397 .m(1) 15398 .n(8) 15399 .k(k) 15400 .ks(3) 15401 .a_offset(83) 15402 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15403 } 15404 } 15405 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,zero)15406 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, zero) { 15407 TEST_REQUIRES_ARM_NEON; 15408 for (size_t k = 1; k <= 80; k += 17) { 15409 for (uint32_t mz = 0; mz < 1; mz++) { 15410 GemmMicrokernelTester() 15411 .mr(1) 15412 .nr(8) 15413 .kr(8) 15414 .sr(1) 15415 .m(1) 15416 .n(8) 15417 .k(k) 15418 .ks(3) 15419 .a_offset(83) 15420 .zero_index(mz) 15421 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15422 } 15423 } 15424 } 15425 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,qmin)15426 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, qmin) { 15427 TEST_REQUIRES_ARM_NEON; 15428 GemmMicrokernelTester() 15429 .mr(1) 15430 .nr(8) 15431 .kr(8) 15432 .sr(1) 15433 .m(1) 15434 .n(8) 15435 .k(16) 15436 .qmin(128) 15437 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15438 } 15439 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,qmax)15440 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, qmax) { 15441 TEST_REQUIRES_ARM_NEON; 15442 GemmMicrokernelTester() 15443 .mr(1) 15444 .nr(8) 15445 .kr(8) 15446 .sr(1) 15447 .m(1) 15448 .n(8) 15449 .k(16) 15450 .qmax(128) 15451 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15452 } 15453 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL,strided_cm)15454 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL, strided_cm) { 15455 TEST_REQUIRES_ARM_NEON; 15456 GemmMicrokernelTester() 15457 .mr(1) 15458 .nr(8) 15459 .kr(8) 15460 .sr(1) 15461 .m(1) 15462 .n(8) 15463 .k(16) 15464 .cm_stride(11) 15465 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15466 } 15467 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY 15468 15469 15470 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_eq_8)15471 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8) { 15472 TEST_REQUIRES_ARM_NEON; 15473 GemmMicrokernelTester() 15474 .mr(4) 15475 .nr(16) 15476 .kr(1) 15477 .sr(1) 15478 .m(4) 15479 .n(16) 15480 .k(8) 15481 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15482 } 15483 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,strided_cn)15484 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cn) { 15485 TEST_REQUIRES_ARM_NEON; 15486 GemmMicrokernelTester() 15487 .mr(4) 15488 .nr(16) 15489 .kr(1) 15490 .sr(1) 15491 .m(4) 15492 .n(16) 15493 .k(8) 15494 .cn_stride(19) 15495 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15496 } 15497 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile)15498 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile) { 15499 TEST_REQUIRES_ARM_NEON; 15500 for (uint32_t n = 1; n <= 16; n++) { 15501 for (uint32_t m = 1; m <= 4; m++) { 15502 GemmMicrokernelTester() 15503 .mr(4) 15504 .nr(16) 15505 .kr(1) 15506 .sr(1) 15507 .m(m) 15508 .n(n) 15509 .k(8) 15510 .iterations(1) 15511 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15512 } 15513 } 15514 } 15515 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile_m)15516 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_m) { 15517 TEST_REQUIRES_ARM_NEON; 15518 for (uint32_t m = 1; m <= 4; m++) { 15519 GemmMicrokernelTester() 15520 .mr(4) 15521 .nr(16) 15522 .kr(1) 15523 .sr(1) 15524 .m(m) 15525 .n(16) 15526 .k(8) 15527 .iterations(1) 15528 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15529 } 15530 } 15531 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile_n)15532 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_n) { 15533 TEST_REQUIRES_ARM_NEON; 15534 for (uint32_t n = 1; n <= 16; n++) { 15535 GemmMicrokernelTester() 15536 .mr(4) 15537 .nr(16) 15538 .kr(1) 15539 .sr(1) 15540 .m(4) 15541 .n(n) 15542 .k(8) 15543 .iterations(1) 15544 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15545 } 15546 } 15547 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_lt_8)15548 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8) { 15549 TEST_REQUIRES_ARM_NEON; 15550 for (size_t k = 1; k < 8; k++) { 15551 GemmMicrokernelTester() 15552 .mr(4) 15553 .nr(16) 15554 .kr(1) 15555 .sr(1) 15556 .m(4) 15557 .n(16) 15558 .k(k) 15559 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15560 } 15561 } 15562 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_lt_8_subtile)15563 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8_subtile) { 15564 TEST_REQUIRES_ARM_NEON; 15565 for (size_t k = 1; k < 8; k++) { 15566 for (uint32_t n = 1; n <= 16; n++) { 15567 for (uint32_t m = 1; m <= 4; m++) { 15568 GemmMicrokernelTester() 15569 .mr(4) 15570 .nr(16) 15571 .kr(1) 15572 .sr(1) 15573 .m(m) 15574 .n(n) 15575 .k(k) 15576 .iterations(1) 15577 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15578 } 15579 } 15580 } 15581 } 15582 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_gt_8)15583 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8) { 15584 TEST_REQUIRES_ARM_NEON; 15585 for (size_t k = 9; k < 16; k++) { 15586 GemmMicrokernelTester() 15587 .mr(4) 15588 .nr(16) 15589 .kr(1) 15590 .sr(1) 15591 .m(4) 15592 .n(16) 15593 .k(k) 15594 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15595 } 15596 } 15597 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_gt_8_subtile)15598 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8_subtile) { 15599 TEST_REQUIRES_ARM_NEON; 15600 for (size_t k = 9; k < 16; k++) { 15601 for (uint32_t n = 1; n <= 16; n++) { 15602 for (uint32_t m = 1; m <= 4; m++) { 15603 GemmMicrokernelTester() 15604 .mr(4) 15605 .nr(16) 15606 .kr(1) 15607 .sr(1) 15608 .m(m) 15609 .n(n) 15610 .k(k) 15611 .iterations(1) 15612 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15613 } 15614 } 15615 } 15616 } 15617 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_div_8)15618 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8) { 15619 TEST_REQUIRES_ARM_NEON; 15620 for (size_t k = 16; k <= 80; k += 8) { 15621 GemmMicrokernelTester() 15622 .mr(4) 15623 .nr(16) 15624 .kr(1) 15625 .sr(1) 15626 .m(4) 15627 .n(16) 15628 .k(k) 15629 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15630 } 15631 } 15632 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_div_8_subtile)15633 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8_subtile) { 15634 TEST_REQUIRES_ARM_NEON; 15635 for (size_t k = 16; k <= 80; k += 8) { 15636 for (uint32_t n = 1; n <= 16; n++) { 15637 for (uint32_t m = 1; m <= 4; m++) { 15638 GemmMicrokernelTester() 15639 .mr(4) 15640 .nr(16) 15641 .kr(1) 15642 .sr(1) 15643 .m(m) 15644 .n(n) 15645 .k(k) 15646 .iterations(1) 15647 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15648 } 15649 } 15650 } 15651 } 15652 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_gt_16)15653 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16) { 15654 TEST_REQUIRES_ARM_NEON; 15655 for (uint32_t n = 17; n < 32; n++) { 15656 for (size_t k = 1; k <= 40; k += 9) { 15657 GemmMicrokernelTester() 15658 .mr(4) 15659 .nr(16) 15660 .kr(1) 15661 .sr(1) 15662 .m(4) 15663 .n(n) 15664 .k(k) 15665 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15666 } 15667 } 15668 } 15669 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_gt_16_strided_cn)15670 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_strided_cn) { 15671 TEST_REQUIRES_ARM_NEON; 15672 for (uint32_t n = 17; n < 32; n++) { 15673 for (size_t k = 1; k <= 40; k += 9) { 15674 GemmMicrokernelTester() 15675 .mr(4) 15676 .nr(16) 15677 .kr(1) 15678 .sr(1) 15679 .m(4) 15680 .n(n) 15681 .k(k) 15682 .cn_stride(19) 15683 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15684 } 15685 } 15686 } 15687 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_gt_16_subtile)15688 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_subtile) { 15689 TEST_REQUIRES_ARM_NEON; 15690 for (uint32_t n = 17; n < 32; n++) { 15691 for (size_t k = 1; k <= 40; k += 9) { 15692 for (uint32_t m = 1; m <= 4; m++) { 15693 GemmMicrokernelTester() 15694 .mr(4) 15695 .nr(16) 15696 .kr(1) 15697 .sr(1) 15698 .m(m) 15699 .n(n) 15700 .k(k) 15701 .iterations(1) 15702 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15703 } 15704 } 15705 } 15706 } 15707 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_div_16)15708 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16) { 15709 TEST_REQUIRES_ARM_NEON; 15710 for (uint32_t n = 32; n <= 48; n += 16) { 15711 for (size_t k = 1; k <= 40; k += 9) { 15712 GemmMicrokernelTester() 15713 .mr(4) 15714 .nr(16) 15715 .kr(1) 15716 .sr(1) 15717 .m(4) 15718 .n(n) 15719 .k(k) 15720 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15721 } 15722 } 15723 } 15724 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_div_16_strided_cn)15725 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_strided_cn) { 15726 TEST_REQUIRES_ARM_NEON; 15727 for (uint32_t n = 32; n <= 48; n += 16) { 15728 for (size_t k = 1; k <= 40; k += 9) { 15729 GemmMicrokernelTester() 15730 .mr(4) 15731 .nr(16) 15732 .kr(1) 15733 .sr(1) 15734 .m(4) 15735 .n(n) 15736 .k(k) 15737 .cn_stride(19) 15738 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15739 } 15740 } 15741 } 15742 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_div_16_subtile)15743 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_subtile) { 15744 TEST_REQUIRES_ARM_NEON; 15745 for (uint32_t n = 32; n <= 48; n += 16) { 15746 for (size_t k = 1; k <= 40; k += 9) { 15747 for (uint32_t m = 1; m <= 4; m++) { 15748 GemmMicrokernelTester() 15749 .mr(4) 15750 .nr(16) 15751 .kr(1) 15752 .sr(1) 15753 .m(m) 15754 .n(n) 15755 .k(k) 15756 .iterations(1) 15757 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15758 } 15759 } 15760 } 15761 } 15762 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,small_kernel)15763 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, small_kernel) { 15764 TEST_REQUIRES_ARM_NEON; 15765 for (size_t k = 1; k <= 40; k += 9) { 15766 GemmMicrokernelTester() 15767 .mr(4) 15768 .nr(16) 15769 .kr(1) 15770 .sr(1) 15771 .m(4) 15772 .n(16) 15773 .k(k) 15774 .ks(3) 15775 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15776 } 15777 } 15778 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,small_kernel_subtile)15779 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, small_kernel_subtile) { 15780 TEST_REQUIRES_ARM_NEON; 15781 for (size_t k = 1; k <= 40; k += 9) { 15782 for (uint32_t n = 1; n <= 16; n++) { 15783 for (uint32_t m = 1; m <= 4; m++) { 15784 GemmMicrokernelTester() 15785 .mr(4) 15786 .nr(16) 15787 .kr(1) 15788 .sr(1) 15789 .m(m) 15790 .n(n) 15791 .k(k) 15792 .ks(3) 15793 .iterations(1) 15794 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15795 } 15796 } 15797 } 15798 } 15799 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_gt_16_small_kernel)15800 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_small_kernel) { 15801 TEST_REQUIRES_ARM_NEON; 15802 for (uint32_t n = 17; n < 32; n++) { 15803 for (size_t k = 1; k <= 40; k += 9) { 15804 GemmMicrokernelTester() 15805 .mr(4) 15806 .nr(16) 15807 .kr(1) 15808 .sr(1) 15809 .m(4) 15810 .n(n) 15811 .k(k) 15812 .ks(3) 15813 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15814 } 15815 } 15816 } 15817 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_div_16_small_kernel)15818 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_small_kernel) { 15819 TEST_REQUIRES_ARM_NEON; 15820 for (uint32_t n = 32; n <= 48; n += 16) { 15821 for (size_t k = 1; k <= 40; k += 9) { 15822 GemmMicrokernelTester() 15823 .mr(4) 15824 .nr(16) 15825 .kr(1) 15826 .sr(1) 15827 .m(4) 15828 .n(n) 15829 .k(k) 15830 .ks(3) 15831 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15832 } 15833 } 15834 } 15835 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,strided_cm_subtile)15836 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm_subtile) { 15837 TEST_REQUIRES_ARM_NEON; 15838 for (size_t k = 1; k <= 40; k += 9) { 15839 for (uint32_t n = 1; n <= 16; n++) { 15840 for (uint32_t m = 1; m <= 4; m++) { 15841 GemmMicrokernelTester() 15842 .mr(4) 15843 .nr(16) 15844 .kr(1) 15845 .sr(1) 15846 .m(m) 15847 .n(n) 15848 .k(k) 15849 .cm_stride(19) 15850 .iterations(1) 15851 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15852 } 15853 } 15854 } 15855 } 15856 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,a_offset)15857 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, a_offset) { 15858 TEST_REQUIRES_ARM_NEON; 15859 for (size_t k = 1; k <= 40; k += 9) { 15860 GemmMicrokernelTester() 15861 .mr(4) 15862 .nr(16) 15863 .kr(1) 15864 .sr(1) 15865 .m(4) 15866 .n(16) 15867 .k(k) 15868 .ks(3) 15869 .a_offset(163) 15870 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15871 } 15872 } 15873 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,zero)15874 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, zero) { 15875 TEST_REQUIRES_ARM_NEON; 15876 for (size_t k = 1; k <= 40; k += 9) { 15877 for (uint32_t mz = 0; mz < 4; mz++) { 15878 GemmMicrokernelTester() 15879 .mr(4) 15880 .nr(16) 15881 .kr(1) 15882 .sr(1) 15883 .m(4) 15884 .n(16) 15885 .k(k) 15886 .ks(3) 15887 .a_offset(163) 15888 .zero_index(mz) 15889 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15890 } 15891 } 15892 } 15893 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,qmin)15894 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmin) { 15895 TEST_REQUIRES_ARM_NEON; 15896 GemmMicrokernelTester() 15897 .mr(4) 15898 .nr(16) 15899 .kr(1) 15900 .sr(1) 15901 .m(4) 15902 .n(16) 15903 .k(8) 15904 .qmin(128) 15905 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15906 } 15907 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,qmax)15908 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmax) { 15909 TEST_REQUIRES_ARM_NEON; 15910 GemmMicrokernelTester() 15911 .mr(4) 15912 .nr(16) 15913 .kr(1) 15914 .sr(1) 15915 .m(4) 15916 .n(16) 15917 .k(8) 15918 .qmax(128) 15919 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15920 } 15921 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,strided_cm)15922 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm) { 15923 TEST_REQUIRES_ARM_NEON; 15924 GemmMicrokernelTester() 15925 .mr(4) 15926 .nr(16) 15927 .kr(1) 15928 .sr(1) 15929 .m(4) 15930 .n(16) 15931 .k(8) 15932 .cm_stride(19) 15933 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15934 } 15935 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY 15936 15937 15938 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,k_eq_8)15939 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8) { 15940 TEST_REQUIRES_ARM_NEON_DOT; 15941 GemmMicrokernelTester() 15942 .mr(4) 15943 .nr(16) 15944 .kr(4) 15945 .sr(1) 15946 .m(4) 15947 .n(16) 15948 .k(8) 15949 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15950 } 15951 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,strided_cn)15952 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, strided_cn) { 15953 TEST_REQUIRES_ARM_NEON_DOT; 15954 GemmMicrokernelTester() 15955 .mr(4) 15956 .nr(16) 15957 .kr(4) 15958 .sr(1) 15959 .m(4) 15960 .n(16) 15961 .k(8) 15962 .cn_stride(19) 15963 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15964 } 15965 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,k_eq_8_subtile)15966 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile) { 15967 TEST_REQUIRES_ARM_NEON_DOT; 15968 for (uint32_t n = 1; n <= 16; n++) { 15969 for (uint32_t m = 1; m <= 4; m++) { 15970 GemmMicrokernelTester() 15971 .mr(4) 15972 .nr(16) 15973 .kr(4) 15974 .sr(1) 15975 .m(m) 15976 .n(n) 15977 .k(8) 15978 .iterations(1) 15979 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15980 } 15981 } 15982 } 15983 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,k_eq_8_subtile_m)15984 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_m) { 15985 TEST_REQUIRES_ARM_NEON_DOT; 15986 for (uint32_t m = 1; m <= 4; m++) { 15987 GemmMicrokernelTester() 15988 .mr(4) 15989 .nr(16) 15990 .kr(4) 15991 .sr(1) 15992 .m(m) 15993 .n(16) 15994 .k(8) 15995 .iterations(1) 15996 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15997 } 15998 } 15999 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,k_eq_8_subtile_n)16000 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_n) { 16001 TEST_REQUIRES_ARM_NEON_DOT; 16002 for (uint32_t n = 1; n <= 16; n++) { 16003 GemmMicrokernelTester() 16004 .mr(4) 16005 .nr(16) 16006 .kr(4) 16007 .sr(1) 16008 .m(4) 16009 .n(n) 16010 .k(8) 16011 .iterations(1) 16012 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16013 } 16014 } 16015 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,k_lt_8)16016 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, k_lt_8) { 16017 TEST_REQUIRES_ARM_NEON_DOT; 16018 for (size_t k = 1; k < 8; k++) { 16019 GemmMicrokernelTester() 16020 .mr(4) 16021 .nr(16) 16022 .kr(4) 16023 .sr(1) 16024 .m(4) 16025 .n(16) 16026 .k(k) 16027 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16028 } 16029 } 16030 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,k_lt_8_subtile)16031 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, k_lt_8_subtile) { 16032 TEST_REQUIRES_ARM_NEON_DOT; 16033 for (size_t k = 1; k < 8; k++) { 16034 for (uint32_t n = 1; n <= 16; n++) { 16035 for (uint32_t m = 1; m <= 4; m++) { 16036 GemmMicrokernelTester() 16037 .mr(4) 16038 .nr(16) 16039 .kr(4) 16040 .sr(1) 16041 .m(m) 16042 .n(n) 16043 .k(k) 16044 .iterations(1) 16045 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16046 } 16047 } 16048 } 16049 } 16050 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,k_gt_8)16051 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, k_gt_8) { 16052 TEST_REQUIRES_ARM_NEON_DOT; 16053 for (size_t k = 9; k < 16; k++) { 16054 GemmMicrokernelTester() 16055 .mr(4) 16056 .nr(16) 16057 .kr(4) 16058 .sr(1) 16059 .m(4) 16060 .n(16) 16061 .k(k) 16062 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16063 } 16064 } 16065 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,k_gt_8_subtile)16066 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, k_gt_8_subtile) { 16067 TEST_REQUIRES_ARM_NEON_DOT; 16068 for (size_t k = 9; k < 16; k++) { 16069 for (uint32_t n = 1; n <= 16; n++) { 16070 for (uint32_t m = 1; m <= 4; m++) { 16071 GemmMicrokernelTester() 16072 .mr(4) 16073 .nr(16) 16074 .kr(4) 16075 .sr(1) 16076 .m(m) 16077 .n(n) 16078 .k(k) 16079 .iterations(1) 16080 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16081 } 16082 } 16083 } 16084 } 16085 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,k_div_8)16086 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, k_div_8) { 16087 TEST_REQUIRES_ARM_NEON_DOT; 16088 for (size_t k = 16; k <= 80; k += 8) { 16089 GemmMicrokernelTester() 16090 .mr(4) 16091 .nr(16) 16092 .kr(4) 16093 .sr(1) 16094 .m(4) 16095 .n(16) 16096 .k(k) 16097 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16098 } 16099 } 16100 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,k_div_8_subtile)16101 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, k_div_8_subtile) { 16102 TEST_REQUIRES_ARM_NEON_DOT; 16103 for (size_t k = 16; k <= 80; k += 8) { 16104 for (uint32_t n = 1; n <= 16; n++) { 16105 for (uint32_t m = 1; m <= 4; m++) { 16106 GemmMicrokernelTester() 16107 .mr(4) 16108 .nr(16) 16109 .kr(4) 16110 .sr(1) 16111 .m(m) 16112 .n(n) 16113 .k(k) 16114 .iterations(1) 16115 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16116 } 16117 } 16118 } 16119 } 16120 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,n_gt_16)16121 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16) { 16122 TEST_REQUIRES_ARM_NEON_DOT; 16123 for (uint32_t n = 17; n < 32; n++) { 16124 for (size_t k = 1; k <= 40; k += 9) { 16125 GemmMicrokernelTester() 16126 .mr(4) 16127 .nr(16) 16128 .kr(4) 16129 .sr(1) 16130 .m(4) 16131 .n(n) 16132 .k(k) 16133 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16134 } 16135 } 16136 } 16137 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,n_gt_16_strided_cn)16138 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_strided_cn) { 16139 TEST_REQUIRES_ARM_NEON_DOT; 16140 for (uint32_t n = 17; n < 32; n++) { 16141 for (size_t k = 1; k <= 40; k += 9) { 16142 GemmMicrokernelTester() 16143 .mr(4) 16144 .nr(16) 16145 .kr(4) 16146 .sr(1) 16147 .m(4) 16148 .n(n) 16149 .k(k) 16150 .cn_stride(19) 16151 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16152 } 16153 } 16154 } 16155 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,n_gt_16_subtile)16156 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_subtile) { 16157 TEST_REQUIRES_ARM_NEON_DOT; 16158 for (uint32_t n = 17; n < 32; n++) { 16159 for (size_t k = 1; k <= 40; k += 9) { 16160 for (uint32_t m = 1; m <= 4; m++) { 16161 GemmMicrokernelTester() 16162 .mr(4) 16163 .nr(16) 16164 .kr(4) 16165 .sr(1) 16166 .m(m) 16167 .n(n) 16168 .k(k) 16169 .iterations(1) 16170 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16171 } 16172 } 16173 } 16174 } 16175 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,n_div_16)16176 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, n_div_16) { 16177 TEST_REQUIRES_ARM_NEON_DOT; 16178 for (uint32_t n = 32; n <= 48; n += 16) { 16179 for (size_t k = 1; k <= 40; k += 9) { 16180 GemmMicrokernelTester() 16181 .mr(4) 16182 .nr(16) 16183 .kr(4) 16184 .sr(1) 16185 .m(4) 16186 .n(n) 16187 .k(k) 16188 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16189 } 16190 } 16191 } 16192 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,n_div_16_strided_cn)16193 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_strided_cn) { 16194 TEST_REQUIRES_ARM_NEON_DOT; 16195 for (uint32_t n = 32; n <= 48; n += 16) { 16196 for (size_t k = 1; k <= 40; k += 9) { 16197 GemmMicrokernelTester() 16198 .mr(4) 16199 .nr(16) 16200 .kr(4) 16201 .sr(1) 16202 .m(4) 16203 .n(n) 16204 .k(k) 16205 .cn_stride(19) 16206 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16207 } 16208 } 16209 } 16210 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,n_div_16_subtile)16211 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_subtile) { 16212 TEST_REQUIRES_ARM_NEON_DOT; 16213 for (uint32_t n = 32; n <= 48; n += 16) { 16214 for (size_t k = 1; k <= 40; k += 9) { 16215 for (uint32_t m = 1; m <= 4; m++) { 16216 GemmMicrokernelTester() 16217 .mr(4) 16218 .nr(16) 16219 .kr(4) 16220 .sr(1) 16221 .m(m) 16222 .n(n) 16223 .k(k) 16224 .iterations(1) 16225 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16226 } 16227 } 16228 } 16229 } 16230 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,small_kernel)16231 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, small_kernel) { 16232 TEST_REQUIRES_ARM_NEON_DOT; 16233 for (size_t k = 1; k <= 40; k += 9) { 16234 GemmMicrokernelTester() 16235 .mr(4) 16236 .nr(16) 16237 .kr(4) 16238 .sr(1) 16239 .m(4) 16240 .n(16) 16241 .k(k) 16242 .ks(3) 16243 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16244 } 16245 } 16246 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,small_kernel_subtile)16247 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, small_kernel_subtile) { 16248 TEST_REQUIRES_ARM_NEON_DOT; 16249 for (size_t k = 1; k <= 40; k += 9) { 16250 for (uint32_t n = 1; n <= 16; n++) { 16251 for (uint32_t m = 1; m <= 4; m++) { 16252 GemmMicrokernelTester() 16253 .mr(4) 16254 .nr(16) 16255 .kr(4) 16256 .sr(1) 16257 .m(m) 16258 .n(n) 16259 .k(k) 16260 .ks(3) 16261 .iterations(1) 16262 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16263 } 16264 } 16265 } 16266 } 16267 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,n_gt_16_small_kernel)16268 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_small_kernel) { 16269 TEST_REQUIRES_ARM_NEON_DOT; 16270 for (uint32_t n = 17; n < 32; n++) { 16271 for (size_t k = 1; k <= 40; k += 9) { 16272 GemmMicrokernelTester() 16273 .mr(4) 16274 .nr(16) 16275 .kr(4) 16276 .sr(1) 16277 .m(4) 16278 .n(n) 16279 .k(k) 16280 .ks(3) 16281 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16282 } 16283 } 16284 } 16285 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,n_div_16_small_kernel)16286 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_small_kernel) { 16287 TEST_REQUIRES_ARM_NEON_DOT; 16288 for (uint32_t n = 32; n <= 48; n += 16) { 16289 for (size_t k = 1; k <= 40; k += 9) { 16290 GemmMicrokernelTester() 16291 .mr(4) 16292 .nr(16) 16293 .kr(4) 16294 .sr(1) 16295 .m(4) 16296 .n(n) 16297 .k(k) 16298 .ks(3) 16299 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16300 } 16301 } 16302 } 16303 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,strided_cm_subtile)16304 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, strided_cm_subtile) { 16305 TEST_REQUIRES_ARM_NEON_DOT; 16306 for (size_t k = 1; k <= 40; k += 9) { 16307 for (uint32_t n = 1; n <= 16; n++) { 16308 for (uint32_t m = 1; m <= 4; m++) { 16309 GemmMicrokernelTester() 16310 .mr(4) 16311 .nr(16) 16312 .kr(4) 16313 .sr(1) 16314 .m(m) 16315 .n(n) 16316 .k(k) 16317 .cm_stride(19) 16318 .iterations(1) 16319 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16320 } 16321 } 16322 } 16323 } 16324 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,a_offset)16325 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, a_offset) { 16326 TEST_REQUIRES_ARM_NEON_DOT; 16327 for (size_t k = 1; k <= 40; k += 9) { 16328 GemmMicrokernelTester() 16329 .mr(4) 16330 .nr(16) 16331 .kr(4) 16332 .sr(1) 16333 .m(4) 16334 .n(16) 16335 .k(k) 16336 .ks(3) 16337 .a_offset(163) 16338 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16339 } 16340 } 16341 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,zero)16342 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, zero) { 16343 TEST_REQUIRES_ARM_NEON_DOT; 16344 for (size_t k = 1; k <= 40; k += 9) { 16345 for (uint32_t mz = 0; mz < 4; mz++) { 16346 GemmMicrokernelTester() 16347 .mr(4) 16348 .nr(16) 16349 .kr(4) 16350 .sr(1) 16351 .m(4) 16352 .n(16) 16353 .k(k) 16354 .ks(3) 16355 .a_offset(163) 16356 .zero_index(mz) 16357 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16358 } 16359 } 16360 } 16361 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,qmin)16362 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, qmin) { 16363 TEST_REQUIRES_ARM_NEON_DOT; 16364 GemmMicrokernelTester() 16365 .mr(4) 16366 .nr(16) 16367 .kr(4) 16368 .sr(1) 16369 .m(4) 16370 .n(16) 16371 .k(8) 16372 .qmin(128) 16373 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16374 } 16375 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,qmax)16376 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, qmax) { 16377 TEST_REQUIRES_ARM_NEON_DOT; 16378 GemmMicrokernelTester() 16379 .mr(4) 16380 .nr(16) 16381 .kr(4) 16382 .sr(1) 16383 .m(4) 16384 .n(16) 16385 .k(8) 16386 .qmax(128) 16387 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16388 } 16389 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64,strided_cm)16390 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD64, strided_cm) { 16391 TEST_REQUIRES_ARM_NEON_DOT; 16392 GemmMicrokernelTester() 16393 .mr(4) 16394 .nr(16) 16395 .kr(4) 16396 .sr(1) 16397 .m(4) 16398 .n(16) 16399 .k(8) 16400 .cm_stride(19) 16401 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16402 } 16403 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY 16404 16405 16406 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16)16407 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16) { 16408 TEST_REQUIRES_ARM_NEON_DOT; 16409 GemmMicrokernelTester() 16410 .mr(4) 16411 .nr(16) 16412 .kr(4) 16413 .sr(1) 16414 .m(4) 16415 .n(16) 16416 .k(16) 16417 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16418 } 16419 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,strided_cn)16420 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, strided_cn) { 16421 TEST_REQUIRES_ARM_NEON_DOT; 16422 GemmMicrokernelTester() 16423 .mr(4) 16424 .nr(16) 16425 .kr(4) 16426 .sr(1) 16427 .m(4) 16428 .n(16) 16429 .k(16) 16430 .cn_stride(19) 16431 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16432 } 16433 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile)16434 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile) { 16435 TEST_REQUIRES_ARM_NEON_DOT; 16436 for (uint32_t n = 1; n <= 16; n++) { 16437 for (uint32_t m = 1; m <= 4; m++) { 16438 GemmMicrokernelTester() 16439 .mr(4) 16440 .nr(16) 16441 .kr(4) 16442 .sr(1) 16443 .m(m) 16444 .n(n) 16445 .k(16) 16446 .iterations(1) 16447 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16448 } 16449 } 16450 } 16451 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile_m)16452 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_m) { 16453 TEST_REQUIRES_ARM_NEON_DOT; 16454 for (uint32_t m = 1; m <= 4; m++) { 16455 GemmMicrokernelTester() 16456 .mr(4) 16457 .nr(16) 16458 .kr(4) 16459 .sr(1) 16460 .m(m) 16461 .n(16) 16462 .k(16) 16463 .iterations(1) 16464 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16465 } 16466 } 16467 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile_n)16468 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_n) { 16469 TEST_REQUIRES_ARM_NEON_DOT; 16470 for (uint32_t n = 1; n <= 16; n++) { 16471 GemmMicrokernelTester() 16472 .mr(4) 16473 .nr(16) 16474 .kr(4) 16475 .sr(1) 16476 .m(4) 16477 .n(n) 16478 .k(16) 16479 .iterations(1) 16480 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16481 } 16482 } 16483 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_lt_16)16484 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_lt_16) { 16485 TEST_REQUIRES_ARM_NEON_DOT; 16486 for (size_t k = 1; k < 16; k++) { 16487 GemmMicrokernelTester() 16488 .mr(4) 16489 .nr(16) 16490 .kr(4) 16491 .sr(1) 16492 .m(4) 16493 .n(16) 16494 .k(k) 16495 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16496 } 16497 } 16498 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_lt_16_subtile)16499 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_lt_16_subtile) { 16500 TEST_REQUIRES_ARM_NEON_DOT; 16501 for (size_t k = 1; k < 16; k++) { 16502 for (uint32_t n = 1; n <= 16; n++) { 16503 for (uint32_t m = 1; m <= 4; m++) { 16504 GemmMicrokernelTester() 16505 .mr(4) 16506 .nr(16) 16507 .kr(4) 16508 .sr(1) 16509 .m(m) 16510 .n(n) 16511 .k(k) 16512 .iterations(1) 16513 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16514 } 16515 } 16516 } 16517 } 16518 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_gt_16)16519 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_gt_16) { 16520 TEST_REQUIRES_ARM_NEON_DOT; 16521 for (size_t k = 17; k < 32; k++) { 16522 GemmMicrokernelTester() 16523 .mr(4) 16524 .nr(16) 16525 .kr(4) 16526 .sr(1) 16527 .m(4) 16528 .n(16) 16529 .k(k) 16530 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16531 } 16532 } 16533 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_gt_16_subtile)16534 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_gt_16_subtile) { 16535 TEST_REQUIRES_ARM_NEON_DOT; 16536 for (size_t k = 17; k < 32; k++) { 16537 for (uint32_t n = 1; n <= 16; n++) { 16538 for (uint32_t m = 1; m <= 4; m++) { 16539 GemmMicrokernelTester() 16540 .mr(4) 16541 .nr(16) 16542 .kr(4) 16543 .sr(1) 16544 .m(m) 16545 .n(n) 16546 .k(k) 16547 .iterations(1) 16548 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16549 } 16550 } 16551 } 16552 } 16553 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_div_16)16554 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_div_16) { 16555 TEST_REQUIRES_ARM_NEON_DOT; 16556 for (size_t k = 32; k <= 160; k += 16) { 16557 GemmMicrokernelTester() 16558 .mr(4) 16559 .nr(16) 16560 .kr(4) 16561 .sr(1) 16562 .m(4) 16563 .n(16) 16564 .k(k) 16565 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16566 } 16567 } 16568 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,k_div_16_subtile)16569 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, k_div_16_subtile) { 16570 TEST_REQUIRES_ARM_NEON_DOT; 16571 for (size_t k = 32; k <= 160; k += 16) { 16572 for (uint32_t n = 1; n <= 16; n++) { 16573 for (uint32_t m = 1; m <= 4; m++) { 16574 GemmMicrokernelTester() 16575 .mr(4) 16576 .nr(16) 16577 .kr(4) 16578 .sr(1) 16579 .m(m) 16580 .n(n) 16581 .k(k) 16582 .iterations(1) 16583 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16584 } 16585 } 16586 } 16587 } 16588 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16)16589 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16) { 16590 TEST_REQUIRES_ARM_NEON_DOT; 16591 for (uint32_t n = 17; n < 32; n++) { 16592 for (size_t k = 1; k <= 80; k += 17) { 16593 GemmMicrokernelTester() 16594 .mr(4) 16595 .nr(16) 16596 .kr(4) 16597 .sr(1) 16598 .m(4) 16599 .n(n) 16600 .k(k) 16601 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16602 } 16603 } 16604 } 16605 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_strided_cn)16606 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_strided_cn) { 16607 TEST_REQUIRES_ARM_NEON_DOT; 16608 for (uint32_t n = 17; n < 32; n++) { 16609 for (size_t k = 1; k <= 80; k += 17) { 16610 GemmMicrokernelTester() 16611 .mr(4) 16612 .nr(16) 16613 .kr(4) 16614 .sr(1) 16615 .m(4) 16616 .n(n) 16617 .k(k) 16618 .cn_stride(19) 16619 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16620 } 16621 } 16622 } 16623 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_subtile)16624 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_subtile) { 16625 TEST_REQUIRES_ARM_NEON_DOT; 16626 for (uint32_t n = 17; n < 32; n++) { 16627 for (size_t k = 1; k <= 80; k += 17) { 16628 for (uint32_t m = 1; m <= 4; m++) { 16629 GemmMicrokernelTester() 16630 .mr(4) 16631 .nr(16) 16632 .kr(4) 16633 .sr(1) 16634 .m(m) 16635 .n(n) 16636 .k(k) 16637 .iterations(1) 16638 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16639 } 16640 } 16641 } 16642 } 16643 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,n_div_16)16644 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, n_div_16) { 16645 TEST_REQUIRES_ARM_NEON_DOT; 16646 for (uint32_t n = 32; n <= 48; n += 16) { 16647 for (size_t k = 1; k <= 80; k += 17) { 16648 GemmMicrokernelTester() 16649 .mr(4) 16650 .nr(16) 16651 .kr(4) 16652 .sr(1) 16653 .m(4) 16654 .n(n) 16655 .k(k) 16656 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16657 } 16658 } 16659 } 16660 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_strided_cn)16661 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_strided_cn) { 16662 TEST_REQUIRES_ARM_NEON_DOT; 16663 for (uint32_t n = 32; n <= 48; n += 16) { 16664 for (size_t k = 1; k <= 80; k += 17) { 16665 GemmMicrokernelTester() 16666 .mr(4) 16667 .nr(16) 16668 .kr(4) 16669 .sr(1) 16670 .m(4) 16671 .n(n) 16672 .k(k) 16673 .cn_stride(19) 16674 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16675 } 16676 } 16677 } 16678 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_subtile)16679 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_subtile) { 16680 TEST_REQUIRES_ARM_NEON_DOT; 16681 for (uint32_t n = 32; n <= 48; n += 16) { 16682 for (size_t k = 1; k <= 80; k += 17) { 16683 for (uint32_t m = 1; m <= 4; m++) { 16684 GemmMicrokernelTester() 16685 .mr(4) 16686 .nr(16) 16687 .kr(4) 16688 .sr(1) 16689 .m(m) 16690 .n(n) 16691 .k(k) 16692 .iterations(1) 16693 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16694 } 16695 } 16696 } 16697 } 16698 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,small_kernel)16699 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, small_kernel) { 16700 TEST_REQUIRES_ARM_NEON_DOT; 16701 for (size_t k = 1; k <= 80; k += 17) { 16702 GemmMicrokernelTester() 16703 .mr(4) 16704 .nr(16) 16705 .kr(4) 16706 .sr(1) 16707 .m(4) 16708 .n(16) 16709 .k(k) 16710 .ks(3) 16711 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16712 } 16713 } 16714 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,small_kernel_subtile)16715 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, small_kernel_subtile) { 16716 TEST_REQUIRES_ARM_NEON_DOT; 16717 for (size_t k = 1; k <= 80; k += 17) { 16718 for (uint32_t n = 1; n <= 16; n++) { 16719 for (uint32_t m = 1; m <= 4; m++) { 16720 GemmMicrokernelTester() 16721 .mr(4) 16722 .nr(16) 16723 .kr(4) 16724 .sr(1) 16725 .m(m) 16726 .n(n) 16727 .k(k) 16728 .ks(3) 16729 .iterations(1) 16730 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16731 } 16732 } 16733 } 16734 } 16735 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_small_kernel)16736 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_small_kernel) { 16737 TEST_REQUIRES_ARM_NEON_DOT; 16738 for (uint32_t n = 17; n < 32; n++) { 16739 for (size_t k = 1; k <= 80; k += 17) { 16740 GemmMicrokernelTester() 16741 .mr(4) 16742 .nr(16) 16743 .kr(4) 16744 .sr(1) 16745 .m(4) 16746 .n(n) 16747 .k(k) 16748 .ks(3) 16749 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16750 } 16751 } 16752 } 16753 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_small_kernel)16754 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_small_kernel) { 16755 TEST_REQUIRES_ARM_NEON_DOT; 16756 for (uint32_t n = 32; n <= 48; n += 16) { 16757 for (size_t k = 1; k <= 80; k += 17) { 16758 GemmMicrokernelTester() 16759 .mr(4) 16760 .nr(16) 16761 .kr(4) 16762 .sr(1) 16763 .m(4) 16764 .n(n) 16765 .k(k) 16766 .ks(3) 16767 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16768 } 16769 } 16770 } 16771 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,strided_cm_subtile)16772 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, strided_cm_subtile) { 16773 TEST_REQUIRES_ARM_NEON_DOT; 16774 for (size_t k = 1; k <= 80; k += 17) { 16775 for (uint32_t n = 1; n <= 16; n++) { 16776 for (uint32_t m = 1; m <= 4; m++) { 16777 GemmMicrokernelTester() 16778 .mr(4) 16779 .nr(16) 16780 .kr(4) 16781 .sr(1) 16782 .m(m) 16783 .n(n) 16784 .k(k) 16785 .cm_stride(19) 16786 .iterations(1) 16787 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16788 } 16789 } 16790 } 16791 } 16792 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,a_offset)16793 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, a_offset) { 16794 TEST_REQUIRES_ARM_NEON_DOT; 16795 for (size_t k = 1; k <= 80; k += 17) { 16796 GemmMicrokernelTester() 16797 .mr(4) 16798 .nr(16) 16799 .kr(4) 16800 .sr(1) 16801 .m(4) 16802 .n(16) 16803 .k(k) 16804 .ks(3) 16805 .a_offset(331) 16806 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16807 } 16808 } 16809 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,zero)16810 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, zero) { 16811 TEST_REQUIRES_ARM_NEON_DOT; 16812 for (size_t k = 1; k <= 80; k += 17) { 16813 for (uint32_t mz = 0; mz < 4; mz++) { 16814 GemmMicrokernelTester() 16815 .mr(4) 16816 .nr(16) 16817 .kr(4) 16818 .sr(1) 16819 .m(4) 16820 .n(16) 16821 .k(k) 16822 .ks(3) 16823 .a_offset(331) 16824 .zero_index(mz) 16825 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16826 } 16827 } 16828 } 16829 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,qmin)16830 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, qmin) { 16831 TEST_REQUIRES_ARM_NEON_DOT; 16832 GemmMicrokernelTester() 16833 .mr(4) 16834 .nr(16) 16835 .kr(4) 16836 .sr(1) 16837 .m(4) 16838 .n(16) 16839 .k(16) 16840 .qmin(128) 16841 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16842 } 16843 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,qmax)16844 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, qmax) { 16845 TEST_REQUIRES_ARM_NEON_DOT; 16846 GemmMicrokernelTester() 16847 .mr(4) 16848 .nr(16) 16849 .kr(4) 16850 .sr(1) 16851 .m(4) 16852 .n(16) 16853 .k(16) 16854 .qmax(128) 16855 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16856 } 16857 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128,strided_cm)16858 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD128, strided_cm) { 16859 TEST_REQUIRES_ARM_NEON_DOT; 16860 GemmMicrokernelTester() 16861 .mr(4) 16862 .nr(16) 16863 .kr(4) 16864 .sr(1) 16865 .m(4) 16866 .n(16) 16867 .k(16) 16868 .cm_stride(19) 16869 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16870 } 16871 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY 16872 16873 16874 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,k_eq_8)16875 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_eq_8) { 16876 TEST_REQUIRES_ARM_NEON; 16877 GemmMicrokernelTester() 16878 .mr(3) 16879 .nr(8) 16880 .kr(8) 16881 .sr(1) 16882 .m(3) 16883 .n(8) 16884 .k(8) 16885 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16886 } 16887 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,strided_cn)16888 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, strided_cn) { 16889 TEST_REQUIRES_ARM_NEON; 16890 GemmMicrokernelTester() 16891 .mr(3) 16892 .nr(8) 16893 .kr(8) 16894 .sr(1) 16895 .m(3) 16896 .n(8) 16897 .k(8) 16898 .cn_stride(11) 16899 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16900 } 16901 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,k_eq_8_subtile)16902 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_eq_8_subtile) { 16903 TEST_REQUIRES_ARM_NEON; 16904 for (uint32_t n = 1; n <= 8; n++) { 16905 for (uint32_t m = 1; m <= 3; m++) { 16906 GemmMicrokernelTester() 16907 .mr(3) 16908 .nr(8) 16909 .kr(8) 16910 .sr(1) 16911 .m(m) 16912 .n(n) 16913 .k(8) 16914 .iterations(1) 16915 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16916 } 16917 } 16918 } 16919 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,k_eq_8_subtile_m)16920 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_eq_8_subtile_m) { 16921 TEST_REQUIRES_ARM_NEON; 16922 for (uint32_t m = 1; m <= 3; m++) { 16923 GemmMicrokernelTester() 16924 .mr(3) 16925 .nr(8) 16926 .kr(8) 16927 .sr(1) 16928 .m(m) 16929 .n(8) 16930 .k(8) 16931 .iterations(1) 16932 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16933 } 16934 } 16935 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,k_eq_8_subtile_n)16936 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_eq_8_subtile_n) { 16937 TEST_REQUIRES_ARM_NEON; 16938 for (uint32_t n = 1; n <= 8; n++) { 16939 GemmMicrokernelTester() 16940 .mr(3) 16941 .nr(8) 16942 .kr(8) 16943 .sr(1) 16944 .m(3) 16945 .n(n) 16946 .k(8) 16947 .iterations(1) 16948 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16949 } 16950 } 16951 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,k_lt_8)16952 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_lt_8) { 16953 TEST_REQUIRES_ARM_NEON; 16954 for (size_t k = 1; k < 8; k++) { 16955 GemmMicrokernelTester() 16956 .mr(3) 16957 .nr(8) 16958 .kr(8) 16959 .sr(1) 16960 .m(3) 16961 .n(8) 16962 .k(k) 16963 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16964 } 16965 } 16966 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,k_lt_8_subtile)16967 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_lt_8_subtile) { 16968 TEST_REQUIRES_ARM_NEON; 16969 for (size_t k = 1; k < 8; k++) { 16970 for (uint32_t n = 1; n <= 8; n++) { 16971 for (uint32_t m = 1; m <= 3; m++) { 16972 GemmMicrokernelTester() 16973 .mr(3) 16974 .nr(8) 16975 .kr(8) 16976 .sr(1) 16977 .m(m) 16978 .n(n) 16979 .k(k) 16980 .iterations(1) 16981 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16982 } 16983 } 16984 } 16985 } 16986 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,k_gt_8)16987 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_gt_8) { 16988 TEST_REQUIRES_ARM_NEON; 16989 for (size_t k = 9; k < 16; k++) { 16990 GemmMicrokernelTester() 16991 .mr(3) 16992 .nr(8) 16993 .kr(8) 16994 .sr(1) 16995 .m(3) 16996 .n(8) 16997 .k(k) 16998 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16999 } 17000 } 17001 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,k_gt_8_subtile)17002 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_gt_8_subtile) { 17003 TEST_REQUIRES_ARM_NEON; 17004 for (size_t k = 9; k < 16; k++) { 17005 for (uint32_t n = 1; n <= 8; n++) { 17006 for (uint32_t m = 1; m <= 3; m++) { 17007 GemmMicrokernelTester() 17008 .mr(3) 17009 .nr(8) 17010 .kr(8) 17011 .sr(1) 17012 .m(m) 17013 .n(n) 17014 .k(k) 17015 .iterations(1) 17016 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17017 } 17018 } 17019 } 17020 } 17021 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,k_div_8)17022 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_div_8) { 17023 TEST_REQUIRES_ARM_NEON; 17024 for (size_t k = 16; k <= 80; k += 8) { 17025 GemmMicrokernelTester() 17026 .mr(3) 17027 .nr(8) 17028 .kr(8) 17029 .sr(1) 17030 .m(3) 17031 .n(8) 17032 .k(k) 17033 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17034 } 17035 } 17036 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,k_div_8_subtile)17037 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_div_8_subtile) { 17038 TEST_REQUIRES_ARM_NEON; 17039 for (size_t k = 16; k <= 80; k += 8) { 17040 for (uint32_t n = 1; n <= 8; n++) { 17041 for (uint32_t m = 1; m <= 3; m++) { 17042 GemmMicrokernelTester() 17043 .mr(3) 17044 .nr(8) 17045 .kr(8) 17046 .sr(1) 17047 .m(m) 17048 .n(n) 17049 .k(k) 17050 .iterations(1) 17051 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17052 } 17053 } 17054 } 17055 } 17056 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,n_gt_8)17057 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_gt_8) { 17058 TEST_REQUIRES_ARM_NEON; 17059 for (uint32_t n = 9; n < 16; n++) { 17060 for (size_t k = 1; k <= 40; k += 9) { 17061 GemmMicrokernelTester() 17062 .mr(3) 17063 .nr(8) 17064 .kr(8) 17065 .sr(1) 17066 .m(3) 17067 .n(n) 17068 .k(k) 17069 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17070 } 17071 } 17072 } 17073 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,n_gt_8_strided_cn)17074 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_gt_8_strided_cn) { 17075 TEST_REQUIRES_ARM_NEON; 17076 for (uint32_t n = 9; n < 16; n++) { 17077 for (size_t k = 1; k <= 40; k += 9) { 17078 GemmMicrokernelTester() 17079 .mr(3) 17080 .nr(8) 17081 .kr(8) 17082 .sr(1) 17083 .m(3) 17084 .n(n) 17085 .k(k) 17086 .cn_stride(11) 17087 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17088 } 17089 } 17090 } 17091 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,n_gt_8_subtile)17092 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_gt_8_subtile) { 17093 TEST_REQUIRES_ARM_NEON; 17094 for (uint32_t n = 9; n < 16; n++) { 17095 for (size_t k = 1; k <= 40; k += 9) { 17096 for (uint32_t m = 1; m <= 3; m++) { 17097 GemmMicrokernelTester() 17098 .mr(3) 17099 .nr(8) 17100 .kr(8) 17101 .sr(1) 17102 .m(m) 17103 .n(n) 17104 .k(k) 17105 .iterations(1) 17106 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17107 } 17108 } 17109 } 17110 } 17111 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,n_div_8)17112 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_div_8) { 17113 TEST_REQUIRES_ARM_NEON; 17114 for (uint32_t n = 16; n <= 24; n += 8) { 17115 for (size_t k = 1; k <= 40; k += 9) { 17116 GemmMicrokernelTester() 17117 .mr(3) 17118 .nr(8) 17119 .kr(8) 17120 .sr(1) 17121 .m(3) 17122 .n(n) 17123 .k(k) 17124 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17125 } 17126 } 17127 } 17128 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,n_div_8_strided_cn)17129 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_div_8_strided_cn) { 17130 TEST_REQUIRES_ARM_NEON; 17131 for (uint32_t n = 16; n <= 24; n += 8) { 17132 for (size_t k = 1; k <= 40; k += 9) { 17133 GemmMicrokernelTester() 17134 .mr(3) 17135 .nr(8) 17136 .kr(8) 17137 .sr(1) 17138 .m(3) 17139 .n(n) 17140 .k(k) 17141 .cn_stride(11) 17142 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17143 } 17144 } 17145 } 17146 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,n_div_8_subtile)17147 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_div_8_subtile) { 17148 TEST_REQUIRES_ARM_NEON; 17149 for (uint32_t n = 16; n <= 24; n += 8) { 17150 for (size_t k = 1; k <= 40; k += 9) { 17151 for (uint32_t m = 1; m <= 3; m++) { 17152 GemmMicrokernelTester() 17153 .mr(3) 17154 .nr(8) 17155 .kr(8) 17156 .sr(1) 17157 .m(m) 17158 .n(n) 17159 .k(k) 17160 .iterations(1) 17161 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17162 } 17163 } 17164 } 17165 } 17166 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,small_kernel)17167 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, small_kernel) { 17168 TEST_REQUIRES_ARM_NEON; 17169 for (size_t k = 1; k <= 40; k += 9) { 17170 GemmMicrokernelTester() 17171 .mr(3) 17172 .nr(8) 17173 .kr(8) 17174 .sr(1) 17175 .m(3) 17176 .n(8) 17177 .k(k) 17178 .ks(3) 17179 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17180 } 17181 } 17182 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,small_kernel_subtile)17183 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, small_kernel_subtile) { 17184 TEST_REQUIRES_ARM_NEON; 17185 for (size_t k = 1; k <= 40; k += 9) { 17186 for (uint32_t n = 1; n <= 8; n++) { 17187 for (uint32_t m = 1; m <= 3; m++) { 17188 GemmMicrokernelTester() 17189 .mr(3) 17190 .nr(8) 17191 .kr(8) 17192 .sr(1) 17193 .m(m) 17194 .n(n) 17195 .k(k) 17196 .ks(3) 17197 .iterations(1) 17198 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17199 } 17200 } 17201 } 17202 } 17203 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,n_gt_8_small_kernel)17204 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_gt_8_small_kernel) { 17205 TEST_REQUIRES_ARM_NEON; 17206 for (uint32_t n = 9; n < 16; n++) { 17207 for (size_t k = 1; k <= 40; k += 9) { 17208 GemmMicrokernelTester() 17209 .mr(3) 17210 .nr(8) 17211 .kr(8) 17212 .sr(1) 17213 .m(3) 17214 .n(n) 17215 .k(k) 17216 .ks(3) 17217 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17218 } 17219 } 17220 } 17221 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,n_div_8_small_kernel)17222 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_div_8_small_kernel) { 17223 TEST_REQUIRES_ARM_NEON; 17224 for (uint32_t n = 16; n <= 24; n += 8) { 17225 for (size_t k = 1; k <= 40; k += 9) { 17226 GemmMicrokernelTester() 17227 .mr(3) 17228 .nr(8) 17229 .kr(8) 17230 .sr(1) 17231 .m(3) 17232 .n(n) 17233 .k(k) 17234 .ks(3) 17235 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17236 } 17237 } 17238 } 17239 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,strided_cm_subtile)17240 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, strided_cm_subtile) { 17241 TEST_REQUIRES_ARM_NEON; 17242 for (size_t k = 1; k <= 40; k += 9) { 17243 for (uint32_t n = 1; n <= 8; n++) { 17244 for (uint32_t m = 1; m <= 3; m++) { 17245 GemmMicrokernelTester() 17246 .mr(3) 17247 .nr(8) 17248 .kr(8) 17249 .sr(1) 17250 .m(m) 17251 .n(n) 17252 .k(k) 17253 .cm_stride(11) 17254 .iterations(1) 17255 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17256 } 17257 } 17258 } 17259 } 17260 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,a_offset)17261 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, a_offset) { 17262 TEST_REQUIRES_ARM_NEON; 17263 for (size_t k = 1; k <= 40; k += 9) { 17264 GemmMicrokernelTester() 17265 .mr(3) 17266 .nr(8) 17267 .kr(8) 17268 .sr(1) 17269 .m(3) 17270 .n(8) 17271 .k(k) 17272 .ks(3) 17273 .a_offset(127) 17274 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17275 } 17276 } 17277 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,zero)17278 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, zero) { 17279 TEST_REQUIRES_ARM_NEON; 17280 for (size_t k = 1; k <= 40; k += 9) { 17281 for (uint32_t mz = 0; mz < 3; mz++) { 17282 GemmMicrokernelTester() 17283 .mr(3) 17284 .nr(8) 17285 .kr(8) 17286 .sr(1) 17287 .m(3) 17288 .n(8) 17289 .k(k) 17290 .ks(3) 17291 .a_offset(127) 17292 .zero_index(mz) 17293 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17294 } 17295 } 17296 } 17297 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,qmin)17298 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, qmin) { 17299 TEST_REQUIRES_ARM_NEON; 17300 GemmMicrokernelTester() 17301 .mr(3) 17302 .nr(8) 17303 .kr(8) 17304 .sr(1) 17305 .m(3) 17306 .n(8) 17307 .k(8) 17308 .qmin(128) 17309 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17310 } 17311 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,qmax)17312 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, qmax) { 17313 TEST_REQUIRES_ARM_NEON; 17314 GemmMicrokernelTester() 17315 .mr(3) 17316 .nr(8) 17317 .kr(8) 17318 .sr(1) 17319 .m(3) 17320 .n(8) 17321 .k(8) 17322 .qmax(128) 17323 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17324 } 17325 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL,strided_cm)17326 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, strided_cm) { 17327 TEST_REQUIRES_ARM_NEON; 17328 GemmMicrokernelTester() 17329 .mr(3) 17330 .nr(8) 17331 .kr(8) 17332 .sr(1) 17333 .m(3) 17334 .n(8) 17335 .k(8) 17336 .cm_stride(11) 17337 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17338 } 17339 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 17340 17341 17342 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,k_eq_8)17343 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_eq_8) { 17344 TEST_REQUIRES_ARM_NEON; 17345 GemmMicrokernelTester() 17346 .mr(1) 17347 .nr(16) 17348 .kr(8) 17349 .sr(1) 17350 .m(1) 17351 .n(16) 17352 .k(8) 17353 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17354 } 17355 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,strided_cn)17356 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, strided_cn) { 17357 TEST_REQUIRES_ARM_NEON; 17358 GemmMicrokernelTester() 17359 .mr(1) 17360 .nr(16) 17361 .kr(8) 17362 .sr(1) 17363 .m(1) 17364 .n(16) 17365 .k(8) 17366 .cn_stride(19) 17367 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17368 } 17369 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,k_eq_8_subtile)17370 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_eq_8_subtile) { 17371 TEST_REQUIRES_ARM_NEON; 17372 for (uint32_t n = 1; n <= 16; n++) { 17373 for (uint32_t m = 1; m <= 1; m++) { 17374 GemmMicrokernelTester() 17375 .mr(1) 17376 .nr(16) 17377 .kr(8) 17378 .sr(1) 17379 .m(m) 17380 .n(n) 17381 .k(8) 17382 .iterations(1) 17383 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17384 } 17385 } 17386 } 17387 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,k_eq_8_subtile_m)17388 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_eq_8_subtile_m) { 17389 TEST_REQUIRES_ARM_NEON; 17390 for (uint32_t m = 1; m <= 1; m++) { 17391 GemmMicrokernelTester() 17392 .mr(1) 17393 .nr(16) 17394 .kr(8) 17395 .sr(1) 17396 .m(m) 17397 .n(16) 17398 .k(8) 17399 .iterations(1) 17400 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17401 } 17402 } 17403 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,k_eq_8_subtile_n)17404 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_eq_8_subtile_n) { 17405 TEST_REQUIRES_ARM_NEON; 17406 for (uint32_t n = 1; n <= 16; n++) { 17407 GemmMicrokernelTester() 17408 .mr(1) 17409 .nr(16) 17410 .kr(8) 17411 .sr(1) 17412 .m(1) 17413 .n(n) 17414 .k(8) 17415 .iterations(1) 17416 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17417 } 17418 } 17419 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,k_lt_8)17420 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_lt_8) { 17421 TEST_REQUIRES_ARM_NEON; 17422 for (size_t k = 1; k < 8; k++) { 17423 GemmMicrokernelTester() 17424 .mr(1) 17425 .nr(16) 17426 .kr(8) 17427 .sr(1) 17428 .m(1) 17429 .n(16) 17430 .k(k) 17431 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17432 } 17433 } 17434 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,k_lt_8_subtile)17435 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_lt_8_subtile) { 17436 TEST_REQUIRES_ARM_NEON; 17437 for (size_t k = 1; k < 8; k++) { 17438 for (uint32_t n = 1; n <= 16; n++) { 17439 for (uint32_t m = 1; m <= 1; m++) { 17440 GemmMicrokernelTester() 17441 .mr(1) 17442 .nr(16) 17443 .kr(8) 17444 .sr(1) 17445 .m(m) 17446 .n(n) 17447 .k(k) 17448 .iterations(1) 17449 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17450 } 17451 } 17452 } 17453 } 17454 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,k_gt_8)17455 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_gt_8) { 17456 TEST_REQUIRES_ARM_NEON; 17457 for (size_t k = 9; k < 16; k++) { 17458 GemmMicrokernelTester() 17459 .mr(1) 17460 .nr(16) 17461 .kr(8) 17462 .sr(1) 17463 .m(1) 17464 .n(16) 17465 .k(k) 17466 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17467 } 17468 } 17469 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,k_gt_8_subtile)17470 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_gt_8_subtile) { 17471 TEST_REQUIRES_ARM_NEON; 17472 for (size_t k = 9; k < 16; k++) { 17473 for (uint32_t n = 1; n <= 16; n++) { 17474 for (uint32_t m = 1; m <= 1; m++) { 17475 GemmMicrokernelTester() 17476 .mr(1) 17477 .nr(16) 17478 .kr(8) 17479 .sr(1) 17480 .m(m) 17481 .n(n) 17482 .k(k) 17483 .iterations(1) 17484 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17485 } 17486 } 17487 } 17488 } 17489 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,k_div_8)17490 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_div_8) { 17491 TEST_REQUIRES_ARM_NEON; 17492 for (size_t k = 16; k <= 80; k += 8) { 17493 GemmMicrokernelTester() 17494 .mr(1) 17495 .nr(16) 17496 .kr(8) 17497 .sr(1) 17498 .m(1) 17499 .n(16) 17500 .k(k) 17501 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17502 } 17503 } 17504 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,k_div_8_subtile)17505 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, k_div_8_subtile) { 17506 TEST_REQUIRES_ARM_NEON; 17507 for (size_t k = 16; k <= 80; k += 8) { 17508 for (uint32_t n = 1; n <= 16; n++) { 17509 for (uint32_t m = 1; m <= 1; m++) { 17510 GemmMicrokernelTester() 17511 .mr(1) 17512 .nr(16) 17513 .kr(8) 17514 .sr(1) 17515 .m(m) 17516 .n(n) 17517 .k(k) 17518 .iterations(1) 17519 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17520 } 17521 } 17522 } 17523 } 17524 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,n_gt_16)17525 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_gt_16) { 17526 TEST_REQUIRES_ARM_NEON; 17527 for (uint32_t n = 17; n < 32; n++) { 17528 for (size_t k = 1; k <= 40; k += 9) { 17529 GemmMicrokernelTester() 17530 .mr(1) 17531 .nr(16) 17532 .kr(8) 17533 .sr(1) 17534 .m(1) 17535 .n(n) 17536 .k(k) 17537 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17538 } 17539 } 17540 } 17541 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,n_gt_16_strided_cn)17542 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_gt_16_strided_cn) { 17543 TEST_REQUIRES_ARM_NEON; 17544 for (uint32_t n = 17; n < 32; n++) { 17545 for (size_t k = 1; k <= 40; k += 9) { 17546 GemmMicrokernelTester() 17547 .mr(1) 17548 .nr(16) 17549 .kr(8) 17550 .sr(1) 17551 .m(1) 17552 .n(n) 17553 .k(k) 17554 .cn_stride(19) 17555 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17556 } 17557 } 17558 } 17559 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,n_gt_16_subtile)17560 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_gt_16_subtile) { 17561 TEST_REQUIRES_ARM_NEON; 17562 for (uint32_t n = 17; n < 32; n++) { 17563 for (size_t k = 1; k <= 40; k += 9) { 17564 for (uint32_t m = 1; m <= 1; m++) { 17565 GemmMicrokernelTester() 17566 .mr(1) 17567 .nr(16) 17568 .kr(8) 17569 .sr(1) 17570 .m(m) 17571 .n(n) 17572 .k(k) 17573 .iterations(1) 17574 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17575 } 17576 } 17577 } 17578 } 17579 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,n_div_16)17580 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_div_16) { 17581 TEST_REQUIRES_ARM_NEON; 17582 for (uint32_t n = 32; n <= 48; n += 16) { 17583 for (size_t k = 1; k <= 40; k += 9) { 17584 GemmMicrokernelTester() 17585 .mr(1) 17586 .nr(16) 17587 .kr(8) 17588 .sr(1) 17589 .m(1) 17590 .n(n) 17591 .k(k) 17592 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17593 } 17594 } 17595 } 17596 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,n_div_16_strided_cn)17597 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_div_16_strided_cn) { 17598 TEST_REQUIRES_ARM_NEON; 17599 for (uint32_t n = 32; n <= 48; n += 16) { 17600 for (size_t k = 1; k <= 40; k += 9) { 17601 GemmMicrokernelTester() 17602 .mr(1) 17603 .nr(16) 17604 .kr(8) 17605 .sr(1) 17606 .m(1) 17607 .n(n) 17608 .k(k) 17609 .cn_stride(19) 17610 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17611 } 17612 } 17613 } 17614 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,n_div_16_subtile)17615 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_div_16_subtile) { 17616 TEST_REQUIRES_ARM_NEON; 17617 for (uint32_t n = 32; n <= 48; n += 16) { 17618 for (size_t k = 1; k <= 40; k += 9) { 17619 for (uint32_t m = 1; m <= 1; m++) { 17620 GemmMicrokernelTester() 17621 .mr(1) 17622 .nr(16) 17623 .kr(8) 17624 .sr(1) 17625 .m(m) 17626 .n(n) 17627 .k(k) 17628 .iterations(1) 17629 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17630 } 17631 } 17632 } 17633 } 17634 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,small_kernel)17635 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, small_kernel) { 17636 TEST_REQUIRES_ARM_NEON; 17637 for (size_t k = 1; k <= 40; k += 9) { 17638 GemmMicrokernelTester() 17639 .mr(1) 17640 .nr(16) 17641 .kr(8) 17642 .sr(1) 17643 .m(1) 17644 .n(16) 17645 .k(k) 17646 .ks(3) 17647 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17648 } 17649 } 17650 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,small_kernel_subtile)17651 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, small_kernel_subtile) { 17652 TEST_REQUIRES_ARM_NEON; 17653 for (size_t k = 1; k <= 40; k += 9) { 17654 for (uint32_t n = 1; n <= 16; n++) { 17655 for (uint32_t m = 1; m <= 1; m++) { 17656 GemmMicrokernelTester() 17657 .mr(1) 17658 .nr(16) 17659 .kr(8) 17660 .sr(1) 17661 .m(m) 17662 .n(n) 17663 .k(k) 17664 .ks(3) 17665 .iterations(1) 17666 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17667 } 17668 } 17669 } 17670 } 17671 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,n_gt_16_small_kernel)17672 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_gt_16_small_kernel) { 17673 TEST_REQUIRES_ARM_NEON; 17674 for (uint32_t n = 17; n < 32; n++) { 17675 for (size_t k = 1; k <= 40; k += 9) { 17676 GemmMicrokernelTester() 17677 .mr(1) 17678 .nr(16) 17679 .kr(8) 17680 .sr(1) 17681 .m(1) 17682 .n(n) 17683 .k(k) 17684 .ks(3) 17685 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17686 } 17687 } 17688 } 17689 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,n_div_16_small_kernel)17690 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, n_div_16_small_kernel) { 17691 TEST_REQUIRES_ARM_NEON; 17692 for (uint32_t n = 32; n <= 48; n += 16) { 17693 for (size_t k = 1; k <= 40; k += 9) { 17694 GemmMicrokernelTester() 17695 .mr(1) 17696 .nr(16) 17697 .kr(8) 17698 .sr(1) 17699 .m(1) 17700 .n(n) 17701 .k(k) 17702 .ks(3) 17703 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17704 } 17705 } 17706 } 17707 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,strided_cm_subtile)17708 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, strided_cm_subtile) { 17709 TEST_REQUIRES_ARM_NEON; 17710 for (size_t k = 1; k <= 40; k += 9) { 17711 for (uint32_t n = 1; n <= 16; n++) { 17712 for (uint32_t m = 1; m <= 1; m++) { 17713 GemmMicrokernelTester() 17714 .mr(1) 17715 .nr(16) 17716 .kr(8) 17717 .sr(1) 17718 .m(m) 17719 .n(n) 17720 .k(k) 17721 .cm_stride(19) 17722 .iterations(1) 17723 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17724 } 17725 } 17726 } 17727 } 17728 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,a_offset)17729 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, a_offset) { 17730 TEST_REQUIRES_ARM_NEON; 17731 for (size_t k = 1; k <= 40; k += 9) { 17732 GemmMicrokernelTester() 17733 .mr(1) 17734 .nr(16) 17735 .kr(8) 17736 .sr(1) 17737 .m(1) 17738 .n(16) 17739 .k(k) 17740 .ks(3) 17741 .a_offset(43) 17742 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17743 } 17744 } 17745 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,zero)17746 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, zero) { 17747 TEST_REQUIRES_ARM_NEON; 17748 for (size_t k = 1; k <= 40; k += 9) { 17749 for (uint32_t mz = 0; mz < 1; mz++) { 17750 GemmMicrokernelTester() 17751 .mr(1) 17752 .nr(16) 17753 .kr(8) 17754 .sr(1) 17755 .m(1) 17756 .n(16) 17757 .k(k) 17758 .ks(3) 17759 .a_offset(43) 17760 .zero_index(mz) 17761 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17762 } 17763 } 17764 } 17765 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,qmin)17766 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, qmin) { 17767 TEST_REQUIRES_ARM_NEON; 17768 GemmMicrokernelTester() 17769 .mr(1) 17770 .nr(16) 17771 .kr(8) 17772 .sr(1) 17773 .m(1) 17774 .n(16) 17775 .k(8) 17776 .qmin(128) 17777 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17778 } 17779 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,qmax)17780 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, qmax) { 17781 TEST_REQUIRES_ARM_NEON; 17782 GemmMicrokernelTester() 17783 .mr(1) 17784 .nr(16) 17785 .kr(8) 17786 .sr(1) 17787 .m(1) 17788 .n(16) 17789 .k(8) 17790 .qmax(128) 17791 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17792 } 17793 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL,strided_cm)17794 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C8__NEON_MULL, strided_cm) { 17795 TEST_REQUIRES_ARM_NEON; 17796 GemmMicrokernelTester() 17797 .mr(1) 17798 .nr(16) 17799 .kr(8) 17800 .sr(1) 17801 .m(1) 17802 .n(16) 17803 .k(8) 17804 .cm_stride(19) 17805 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17806 } 17807 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 17808 17809 17810 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,k_eq_8)17811 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, k_eq_8) { 17812 TEST_REQUIRES_ARM_NEON; 17813 GemmMicrokernelTester() 17814 .mr(2) 17815 .nr(16) 17816 .kr(8) 17817 .sr(1) 17818 .m(2) 17819 .n(16) 17820 .k(8) 17821 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17822 } 17823 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,strided_cn)17824 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, strided_cn) { 17825 TEST_REQUIRES_ARM_NEON; 17826 GemmMicrokernelTester() 17827 .mr(2) 17828 .nr(16) 17829 .kr(8) 17830 .sr(1) 17831 .m(2) 17832 .n(16) 17833 .k(8) 17834 .cn_stride(19) 17835 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17836 } 17837 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,k_eq_8_subtile)17838 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, k_eq_8_subtile) { 17839 TEST_REQUIRES_ARM_NEON; 17840 for (uint32_t n = 1; n <= 16; n++) { 17841 for (uint32_t m = 1; m <= 2; m++) { 17842 GemmMicrokernelTester() 17843 .mr(2) 17844 .nr(16) 17845 .kr(8) 17846 .sr(1) 17847 .m(m) 17848 .n(n) 17849 .k(8) 17850 .iterations(1) 17851 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17852 } 17853 } 17854 } 17855 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,k_eq_8_subtile_m)17856 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, k_eq_8_subtile_m) { 17857 TEST_REQUIRES_ARM_NEON; 17858 for (uint32_t m = 1; m <= 2; m++) { 17859 GemmMicrokernelTester() 17860 .mr(2) 17861 .nr(16) 17862 .kr(8) 17863 .sr(1) 17864 .m(m) 17865 .n(16) 17866 .k(8) 17867 .iterations(1) 17868 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17869 } 17870 } 17871 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,k_eq_8_subtile_n)17872 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, k_eq_8_subtile_n) { 17873 TEST_REQUIRES_ARM_NEON; 17874 for (uint32_t n = 1; n <= 16; n++) { 17875 GemmMicrokernelTester() 17876 .mr(2) 17877 .nr(16) 17878 .kr(8) 17879 .sr(1) 17880 .m(2) 17881 .n(n) 17882 .k(8) 17883 .iterations(1) 17884 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17885 } 17886 } 17887 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,k_lt_8)17888 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, k_lt_8) { 17889 TEST_REQUIRES_ARM_NEON; 17890 for (size_t k = 1; k < 8; k++) { 17891 GemmMicrokernelTester() 17892 .mr(2) 17893 .nr(16) 17894 .kr(8) 17895 .sr(1) 17896 .m(2) 17897 .n(16) 17898 .k(k) 17899 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17900 } 17901 } 17902 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,k_lt_8_subtile)17903 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, k_lt_8_subtile) { 17904 TEST_REQUIRES_ARM_NEON; 17905 for (size_t k = 1; k < 8; k++) { 17906 for (uint32_t n = 1; n <= 16; n++) { 17907 for (uint32_t m = 1; m <= 2; m++) { 17908 GemmMicrokernelTester() 17909 .mr(2) 17910 .nr(16) 17911 .kr(8) 17912 .sr(1) 17913 .m(m) 17914 .n(n) 17915 .k(k) 17916 .iterations(1) 17917 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17918 } 17919 } 17920 } 17921 } 17922 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,k_gt_8)17923 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, k_gt_8) { 17924 TEST_REQUIRES_ARM_NEON; 17925 for (size_t k = 9; k < 16; k++) { 17926 GemmMicrokernelTester() 17927 .mr(2) 17928 .nr(16) 17929 .kr(8) 17930 .sr(1) 17931 .m(2) 17932 .n(16) 17933 .k(k) 17934 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17935 } 17936 } 17937 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,k_gt_8_subtile)17938 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, k_gt_8_subtile) { 17939 TEST_REQUIRES_ARM_NEON; 17940 for (size_t k = 9; k < 16; k++) { 17941 for (uint32_t n = 1; n <= 16; n++) { 17942 for (uint32_t m = 1; m <= 2; m++) { 17943 GemmMicrokernelTester() 17944 .mr(2) 17945 .nr(16) 17946 .kr(8) 17947 .sr(1) 17948 .m(m) 17949 .n(n) 17950 .k(k) 17951 .iterations(1) 17952 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17953 } 17954 } 17955 } 17956 } 17957 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,k_div_8)17958 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, k_div_8) { 17959 TEST_REQUIRES_ARM_NEON; 17960 for (size_t k = 16; k <= 80; k += 8) { 17961 GemmMicrokernelTester() 17962 .mr(2) 17963 .nr(16) 17964 .kr(8) 17965 .sr(1) 17966 .m(2) 17967 .n(16) 17968 .k(k) 17969 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17970 } 17971 } 17972 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,k_div_8_subtile)17973 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, k_div_8_subtile) { 17974 TEST_REQUIRES_ARM_NEON; 17975 for (size_t k = 16; k <= 80; k += 8) { 17976 for (uint32_t n = 1; n <= 16; n++) { 17977 for (uint32_t m = 1; m <= 2; m++) { 17978 GemmMicrokernelTester() 17979 .mr(2) 17980 .nr(16) 17981 .kr(8) 17982 .sr(1) 17983 .m(m) 17984 .n(n) 17985 .k(k) 17986 .iterations(1) 17987 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17988 } 17989 } 17990 } 17991 } 17992 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,n_gt_16)17993 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, n_gt_16) { 17994 TEST_REQUIRES_ARM_NEON; 17995 for (uint32_t n = 17; n < 32; n++) { 17996 for (size_t k = 1; k <= 40; k += 9) { 17997 GemmMicrokernelTester() 17998 .mr(2) 17999 .nr(16) 18000 .kr(8) 18001 .sr(1) 18002 .m(2) 18003 .n(n) 18004 .k(k) 18005 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18006 } 18007 } 18008 } 18009 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,n_gt_16_strided_cn)18010 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, n_gt_16_strided_cn) { 18011 TEST_REQUIRES_ARM_NEON; 18012 for (uint32_t n = 17; n < 32; n++) { 18013 for (size_t k = 1; k <= 40; k += 9) { 18014 GemmMicrokernelTester() 18015 .mr(2) 18016 .nr(16) 18017 .kr(8) 18018 .sr(1) 18019 .m(2) 18020 .n(n) 18021 .k(k) 18022 .cn_stride(19) 18023 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18024 } 18025 } 18026 } 18027 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,n_gt_16_subtile)18028 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, n_gt_16_subtile) { 18029 TEST_REQUIRES_ARM_NEON; 18030 for (uint32_t n = 17; n < 32; n++) { 18031 for (size_t k = 1; k <= 40; k += 9) { 18032 for (uint32_t m = 1; m <= 2; m++) { 18033 GemmMicrokernelTester() 18034 .mr(2) 18035 .nr(16) 18036 .kr(8) 18037 .sr(1) 18038 .m(m) 18039 .n(n) 18040 .k(k) 18041 .iterations(1) 18042 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18043 } 18044 } 18045 } 18046 } 18047 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,n_div_16)18048 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, n_div_16) { 18049 TEST_REQUIRES_ARM_NEON; 18050 for (uint32_t n = 32; n <= 48; n += 16) { 18051 for (size_t k = 1; k <= 40; k += 9) { 18052 GemmMicrokernelTester() 18053 .mr(2) 18054 .nr(16) 18055 .kr(8) 18056 .sr(1) 18057 .m(2) 18058 .n(n) 18059 .k(k) 18060 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18061 } 18062 } 18063 } 18064 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,n_div_16_strided_cn)18065 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, n_div_16_strided_cn) { 18066 TEST_REQUIRES_ARM_NEON; 18067 for (uint32_t n = 32; n <= 48; n += 16) { 18068 for (size_t k = 1; k <= 40; k += 9) { 18069 GemmMicrokernelTester() 18070 .mr(2) 18071 .nr(16) 18072 .kr(8) 18073 .sr(1) 18074 .m(2) 18075 .n(n) 18076 .k(k) 18077 .cn_stride(19) 18078 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18079 } 18080 } 18081 } 18082 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,n_div_16_subtile)18083 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, n_div_16_subtile) { 18084 TEST_REQUIRES_ARM_NEON; 18085 for (uint32_t n = 32; n <= 48; n += 16) { 18086 for (size_t k = 1; k <= 40; k += 9) { 18087 for (uint32_t m = 1; m <= 2; m++) { 18088 GemmMicrokernelTester() 18089 .mr(2) 18090 .nr(16) 18091 .kr(8) 18092 .sr(1) 18093 .m(m) 18094 .n(n) 18095 .k(k) 18096 .iterations(1) 18097 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18098 } 18099 } 18100 } 18101 } 18102 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,small_kernel)18103 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, small_kernel) { 18104 TEST_REQUIRES_ARM_NEON; 18105 for (size_t k = 1; k <= 40; k += 9) { 18106 GemmMicrokernelTester() 18107 .mr(2) 18108 .nr(16) 18109 .kr(8) 18110 .sr(1) 18111 .m(2) 18112 .n(16) 18113 .k(k) 18114 .ks(3) 18115 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18116 } 18117 } 18118 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,small_kernel_subtile)18119 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, small_kernel_subtile) { 18120 TEST_REQUIRES_ARM_NEON; 18121 for (size_t k = 1; k <= 40; k += 9) { 18122 for (uint32_t n = 1; n <= 16; n++) { 18123 for (uint32_t m = 1; m <= 2; m++) { 18124 GemmMicrokernelTester() 18125 .mr(2) 18126 .nr(16) 18127 .kr(8) 18128 .sr(1) 18129 .m(m) 18130 .n(n) 18131 .k(k) 18132 .ks(3) 18133 .iterations(1) 18134 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18135 } 18136 } 18137 } 18138 } 18139 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,n_gt_16_small_kernel)18140 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, n_gt_16_small_kernel) { 18141 TEST_REQUIRES_ARM_NEON; 18142 for (uint32_t n = 17; n < 32; n++) { 18143 for (size_t k = 1; k <= 40; k += 9) { 18144 GemmMicrokernelTester() 18145 .mr(2) 18146 .nr(16) 18147 .kr(8) 18148 .sr(1) 18149 .m(2) 18150 .n(n) 18151 .k(k) 18152 .ks(3) 18153 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18154 } 18155 } 18156 } 18157 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,n_div_16_small_kernel)18158 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, n_div_16_small_kernel) { 18159 TEST_REQUIRES_ARM_NEON; 18160 for (uint32_t n = 32; n <= 48; n += 16) { 18161 for (size_t k = 1; k <= 40; k += 9) { 18162 GemmMicrokernelTester() 18163 .mr(2) 18164 .nr(16) 18165 .kr(8) 18166 .sr(1) 18167 .m(2) 18168 .n(n) 18169 .k(k) 18170 .ks(3) 18171 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18172 } 18173 } 18174 } 18175 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,strided_cm_subtile)18176 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, strided_cm_subtile) { 18177 TEST_REQUIRES_ARM_NEON; 18178 for (size_t k = 1; k <= 40; k += 9) { 18179 for (uint32_t n = 1; n <= 16; n++) { 18180 for (uint32_t m = 1; m <= 2; m++) { 18181 GemmMicrokernelTester() 18182 .mr(2) 18183 .nr(16) 18184 .kr(8) 18185 .sr(1) 18186 .m(m) 18187 .n(n) 18188 .k(k) 18189 .cm_stride(19) 18190 .iterations(1) 18191 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18192 } 18193 } 18194 } 18195 } 18196 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,a_offset)18197 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, a_offset) { 18198 TEST_REQUIRES_ARM_NEON; 18199 for (size_t k = 1; k <= 40; k += 9) { 18200 GemmMicrokernelTester() 18201 .mr(2) 18202 .nr(16) 18203 .kr(8) 18204 .sr(1) 18205 .m(2) 18206 .n(16) 18207 .k(k) 18208 .ks(3) 18209 .a_offset(83) 18210 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18211 } 18212 } 18213 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,zero)18214 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, zero) { 18215 TEST_REQUIRES_ARM_NEON; 18216 for (size_t k = 1; k <= 40; k += 9) { 18217 for (uint32_t mz = 0; mz < 2; mz++) { 18218 GemmMicrokernelTester() 18219 .mr(2) 18220 .nr(16) 18221 .kr(8) 18222 .sr(1) 18223 .m(2) 18224 .n(16) 18225 .k(k) 18226 .ks(3) 18227 .a_offset(83) 18228 .zero_index(mz) 18229 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18230 } 18231 } 18232 } 18233 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,qmin)18234 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, qmin) { 18235 TEST_REQUIRES_ARM_NEON; 18236 GemmMicrokernelTester() 18237 .mr(2) 18238 .nr(16) 18239 .kr(8) 18240 .sr(1) 18241 .m(2) 18242 .n(16) 18243 .k(8) 18244 .qmin(128) 18245 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18246 } 18247 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,qmax)18248 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, qmax) { 18249 TEST_REQUIRES_ARM_NEON; 18250 GemmMicrokernelTester() 18251 .mr(2) 18252 .nr(16) 18253 .kr(8) 18254 .sr(1) 18255 .m(2) 18256 .n(16) 18257 .k(8) 18258 .qmax(128) 18259 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18260 } 18261 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL,strided_cm)18262 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C8__NEON_MULL, strided_cm) { 18263 TEST_REQUIRES_ARM_NEON; 18264 GemmMicrokernelTester() 18265 .mr(2) 18266 .nr(16) 18267 .kr(8) 18268 .sr(1) 18269 .m(2) 18270 .n(16) 18271 .k(8) 18272 .cm_stride(19) 18273 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18274 } 18275 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 18276 18277 18278 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,k_eq_8)18279 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, k_eq_8) { 18280 TEST_REQUIRES_ARM_NEON; 18281 GemmMicrokernelTester() 18282 .mr(4) 18283 .nr(16) 18284 .kr(8) 18285 .sr(1) 18286 .m(4) 18287 .n(16) 18288 .k(8) 18289 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18290 } 18291 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,strided_cn)18292 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, strided_cn) { 18293 TEST_REQUIRES_ARM_NEON; 18294 GemmMicrokernelTester() 18295 .mr(4) 18296 .nr(16) 18297 .kr(8) 18298 .sr(1) 18299 .m(4) 18300 .n(16) 18301 .k(8) 18302 .cn_stride(19) 18303 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18304 } 18305 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,k_eq_8_subtile)18306 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, k_eq_8_subtile) { 18307 TEST_REQUIRES_ARM_NEON; 18308 for (uint32_t n = 1; n <= 16; n++) { 18309 for (uint32_t m = 1; m <= 4; m++) { 18310 GemmMicrokernelTester() 18311 .mr(4) 18312 .nr(16) 18313 .kr(8) 18314 .sr(1) 18315 .m(m) 18316 .n(n) 18317 .k(8) 18318 .iterations(1) 18319 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18320 } 18321 } 18322 } 18323 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,k_eq_8_subtile_m)18324 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, k_eq_8_subtile_m) { 18325 TEST_REQUIRES_ARM_NEON; 18326 for (uint32_t m = 1; m <= 4; m++) { 18327 GemmMicrokernelTester() 18328 .mr(4) 18329 .nr(16) 18330 .kr(8) 18331 .sr(1) 18332 .m(m) 18333 .n(16) 18334 .k(8) 18335 .iterations(1) 18336 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18337 } 18338 } 18339 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,k_eq_8_subtile_n)18340 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, k_eq_8_subtile_n) { 18341 TEST_REQUIRES_ARM_NEON; 18342 for (uint32_t n = 1; n <= 16; n++) { 18343 GemmMicrokernelTester() 18344 .mr(4) 18345 .nr(16) 18346 .kr(8) 18347 .sr(1) 18348 .m(4) 18349 .n(n) 18350 .k(8) 18351 .iterations(1) 18352 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18353 } 18354 } 18355 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,k_lt_8)18356 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, k_lt_8) { 18357 TEST_REQUIRES_ARM_NEON; 18358 for (size_t k = 1; k < 8; k++) { 18359 GemmMicrokernelTester() 18360 .mr(4) 18361 .nr(16) 18362 .kr(8) 18363 .sr(1) 18364 .m(4) 18365 .n(16) 18366 .k(k) 18367 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18368 } 18369 } 18370 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,k_lt_8_subtile)18371 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, k_lt_8_subtile) { 18372 TEST_REQUIRES_ARM_NEON; 18373 for (size_t k = 1; k < 8; k++) { 18374 for (uint32_t n = 1; n <= 16; n++) { 18375 for (uint32_t m = 1; m <= 4; m++) { 18376 GemmMicrokernelTester() 18377 .mr(4) 18378 .nr(16) 18379 .kr(8) 18380 .sr(1) 18381 .m(m) 18382 .n(n) 18383 .k(k) 18384 .iterations(1) 18385 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18386 } 18387 } 18388 } 18389 } 18390 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,k_gt_8)18391 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, k_gt_8) { 18392 TEST_REQUIRES_ARM_NEON; 18393 for (size_t k = 9; k < 16; k++) { 18394 GemmMicrokernelTester() 18395 .mr(4) 18396 .nr(16) 18397 .kr(8) 18398 .sr(1) 18399 .m(4) 18400 .n(16) 18401 .k(k) 18402 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18403 } 18404 } 18405 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,k_gt_8_subtile)18406 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, k_gt_8_subtile) { 18407 TEST_REQUIRES_ARM_NEON; 18408 for (size_t k = 9; k < 16; k++) { 18409 for (uint32_t n = 1; n <= 16; n++) { 18410 for (uint32_t m = 1; m <= 4; m++) { 18411 GemmMicrokernelTester() 18412 .mr(4) 18413 .nr(16) 18414 .kr(8) 18415 .sr(1) 18416 .m(m) 18417 .n(n) 18418 .k(k) 18419 .iterations(1) 18420 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18421 } 18422 } 18423 } 18424 } 18425 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,k_div_8)18426 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, k_div_8) { 18427 TEST_REQUIRES_ARM_NEON; 18428 for (size_t k = 16; k <= 80; k += 8) { 18429 GemmMicrokernelTester() 18430 .mr(4) 18431 .nr(16) 18432 .kr(8) 18433 .sr(1) 18434 .m(4) 18435 .n(16) 18436 .k(k) 18437 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18438 } 18439 } 18440 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,k_div_8_subtile)18441 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, k_div_8_subtile) { 18442 TEST_REQUIRES_ARM_NEON; 18443 for (size_t k = 16; k <= 80; k += 8) { 18444 for (uint32_t n = 1; n <= 16; n++) { 18445 for (uint32_t m = 1; m <= 4; m++) { 18446 GemmMicrokernelTester() 18447 .mr(4) 18448 .nr(16) 18449 .kr(8) 18450 .sr(1) 18451 .m(m) 18452 .n(n) 18453 .k(k) 18454 .iterations(1) 18455 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18456 } 18457 } 18458 } 18459 } 18460 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,n_gt_16)18461 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, n_gt_16) { 18462 TEST_REQUIRES_ARM_NEON; 18463 for (uint32_t n = 17; n < 32; n++) { 18464 for (size_t k = 1; k <= 40; k += 9) { 18465 GemmMicrokernelTester() 18466 .mr(4) 18467 .nr(16) 18468 .kr(8) 18469 .sr(1) 18470 .m(4) 18471 .n(n) 18472 .k(k) 18473 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18474 } 18475 } 18476 } 18477 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,n_gt_16_strided_cn)18478 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, n_gt_16_strided_cn) { 18479 TEST_REQUIRES_ARM_NEON; 18480 for (uint32_t n = 17; n < 32; n++) { 18481 for (size_t k = 1; k <= 40; k += 9) { 18482 GemmMicrokernelTester() 18483 .mr(4) 18484 .nr(16) 18485 .kr(8) 18486 .sr(1) 18487 .m(4) 18488 .n(n) 18489 .k(k) 18490 .cn_stride(19) 18491 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18492 } 18493 } 18494 } 18495 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,n_gt_16_subtile)18496 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, n_gt_16_subtile) { 18497 TEST_REQUIRES_ARM_NEON; 18498 for (uint32_t n = 17; n < 32; n++) { 18499 for (size_t k = 1; k <= 40; k += 9) { 18500 for (uint32_t m = 1; m <= 4; m++) { 18501 GemmMicrokernelTester() 18502 .mr(4) 18503 .nr(16) 18504 .kr(8) 18505 .sr(1) 18506 .m(m) 18507 .n(n) 18508 .k(k) 18509 .iterations(1) 18510 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18511 } 18512 } 18513 } 18514 } 18515 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,n_div_16)18516 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, n_div_16) { 18517 TEST_REQUIRES_ARM_NEON; 18518 for (uint32_t n = 32; n <= 48; n += 16) { 18519 for (size_t k = 1; k <= 40; k += 9) { 18520 GemmMicrokernelTester() 18521 .mr(4) 18522 .nr(16) 18523 .kr(8) 18524 .sr(1) 18525 .m(4) 18526 .n(n) 18527 .k(k) 18528 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18529 } 18530 } 18531 } 18532 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,n_div_16_strided_cn)18533 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, n_div_16_strided_cn) { 18534 TEST_REQUIRES_ARM_NEON; 18535 for (uint32_t n = 32; n <= 48; n += 16) { 18536 for (size_t k = 1; k <= 40; k += 9) { 18537 GemmMicrokernelTester() 18538 .mr(4) 18539 .nr(16) 18540 .kr(8) 18541 .sr(1) 18542 .m(4) 18543 .n(n) 18544 .k(k) 18545 .cn_stride(19) 18546 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18547 } 18548 } 18549 } 18550 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,n_div_16_subtile)18551 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, n_div_16_subtile) { 18552 TEST_REQUIRES_ARM_NEON; 18553 for (uint32_t n = 32; n <= 48; n += 16) { 18554 for (size_t k = 1; k <= 40; k += 9) { 18555 for (uint32_t m = 1; m <= 4; m++) { 18556 GemmMicrokernelTester() 18557 .mr(4) 18558 .nr(16) 18559 .kr(8) 18560 .sr(1) 18561 .m(m) 18562 .n(n) 18563 .k(k) 18564 .iterations(1) 18565 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18566 } 18567 } 18568 } 18569 } 18570 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,small_kernel)18571 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, small_kernel) { 18572 TEST_REQUIRES_ARM_NEON; 18573 for (size_t k = 1; k <= 40; k += 9) { 18574 GemmMicrokernelTester() 18575 .mr(4) 18576 .nr(16) 18577 .kr(8) 18578 .sr(1) 18579 .m(4) 18580 .n(16) 18581 .k(k) 18582 .ks(3) 18583 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18584 } 18585 } 18586 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,small_kernel_subtile)18587 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, small_kernel_subtile) { 18588 TEST_REQUIRES_ARM_NEON; 18589 for (size_t k = 1; k <= 40; k += 9) { 18590 for (uint32_t n = 1; n <= 16; n++) { 18591 for (uint32_t m = 1; m <= 4; m++) { 18592 GemmMicrokernelTester() 18593 .mr(4) 18594 .nr(16) 18595 .kr(8) 18596 .sr(1) 18597 .m(m) 18598 .n(n) 18599 .k(k) 18600 .ks(3) 18601 .iterations(1) 18602 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18603 } 18604 } 18605 } 18606 } 18607 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,n_gt_16_small_kernel)18608 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, n_gt_16_small_kernel) { 18609 TEST_REQUIRES_ARM_NEON; 18610 for (uint32_t n = 17; n < 32; n++) { 18611 for (size_t k = 1; k <= 40; k += 9) { 18612 GemmMicrokernelTester() 18613 .mr(4) 18614 .nr(16) 18615 .kr(8) 18616 .sr(1) 18617 .m(4) 18618 .n(n) 18619 .k(k) 18620 .ks(3) 18621 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18622 } 18623 } 18624 } 18625 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,n_div_16_small_kernel)18626 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, n_div_16_small_kernel) { 18627 TEST_REQUIRES_ARM_NEON; 18628 for (uint32_t n = 32; n <= 48; n += 16) { 18629 for (size_t k = 1; k <= 40; k += 9) { 18630 GemmMicrokernelTester() 18631 .mr(4) 18632 .nr(16) 18633 .kr(8) 18634 .sr(1) 18635 .m(4) 18636 .n(n) 18637 .k(k) 18638 .ks(3) 18639 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18640 } 18641 } 18642 } 18643 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,strided_cm_subtile)18644 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, strided_cm_subtile) { 18645 TEST_REQUIRES_ARM_NEON; 18646 for (size_t k = 1; k <= 40; k += 9) { 18647 for (uint32_t n = 1; n <= 16; n++) { 18648 for (uint32_t m = 1; m <= 4; m++) { 18649 GemmMicrokernelTester() 18650 .mr(4) 18651 .nr(16) 18652 .kr(8) 18653 .sr(1) 18654 .m(m) 18655 .n(n) 18656 .k(k) 18657 .cm_stride(19) 18658 .iterations(1) 18659 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18660 } 18661 } 18662 } 18663 } 18664 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,a_offset)18665 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, a_offset) { 18666 TEST_REQUIRES_ARM_NEON; 18667 for (size_t k = 1; k <= 40; k += 9) { 18668 GemmMicrokernelTester() 18669 .mr(4) 18670 .nr(16) 18671 .kr(8) 18672 .sr(1) 18673 .m(4) 18674 .n(16) 18675 .k(k) 18676 .ks(3) 18677 .a_offset(163) 18678 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18679 } 18680 } 18681 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,zero)18682 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, zero) { 18683 TEST_REQUIRES_ARM_NEON; 18684 for (size_t k = 1; k <= 40; k += 9) { 18685 for (uint32_t mz = 0; mz < 4; mz++) { 18686 GemmMicrokernelTester() 18687 .mr(4) 18688 .nr(16) 18689 .kr(8) 18690 .sr(1) 18691 .m(4) 18692 .n(16) 18693 .k(k) 18694 .ks(3) 18695 .a_offset(163) 18696 .zero_index(mz) 18697 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18698 } 18699 } 18700 } 18701 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,qmin)18702 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, qmin) { 18703 TEST_REQUIRES_ARM_NEON; 18704 GemmMicrokernelTester() 18705 .mr(4) 18706 .nr(16) 18707 .kr(8) 18708 .sr(1) 18709 .m(4) 18710 .n(16) 18711 .k(8) 18712 .qmin(128) 18713 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18714 } 18715 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,qmax)18716 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, qmax) { 18717 TEST_REQUIRES_ARM_NEON; 18718 GemmMicrokernelTester() 18719 .mr(4) 18720 .nr(16) 18721 .kr(8) 18722 .sr(1) 18723 .m(4) 18724 .n(16) 18725 .k(8) 18726 .qmax(128) 18727 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18728 } 18729 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL,strided_cm)18730 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C8__NEON_MULL, strided_cm) { 18731 TEST_REQUIRES_ARM_NEON; 18732 GemmMicrokernelTester() 18733 .mr(4) 18734 .nr(16) 18735 .kr(8) 18736 .sr(1) 18737 .m(4) 18738 .n(16) 18739 .k(8) 18740 .cm_stride(19) 18741 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18742 } 18743 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 18744 18745 18746 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,k_eq_16)18747 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, k_eq_16) { 18748 TEST_REQUIRES_ARM_NEON; 18749 GemmMicrokernelTester() 18750 .mr(3) 18751 .nr(16) 18752 .kr(16) 18753 .sr(1) 18754 .m(3) 18755 .n(16) 18756 .k(16) 18757 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18758 } 18759 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,strided_cn)18760 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, strided_cn) { 18761 TEST_REQUIRES_ARM_NEON; 18762 GemmMicrokernelTester() 18763 .mr(3) 18764 .nr(16) 18765 .kr(16) 18766 .sr(1) 18767 .m(3) 18768 .n(16) 18769 .k(16) 18770 .cn_stride(19) 18771 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18772 } 18773 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,k_eq_16_subtile)18774 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, k_eq_16_subtile) { 18775 TEST_REQUIRES_ARM_NEON; 18776 for (uint32_t n = 1; n <= 16; n++) { 18777 for (uint32_t m = 1; m <= 3; m++) { 18778 GemmMicrokernelTester() 18779 .mr(3) 18780 .nr(16) 18781 .kr(16) 18782 .sr(1) 18783 .m(m) 18784 .n(n) 18785 .k(16) 18786 .iterations(1) 18787 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18788 } 18789 } 18790 } 18791 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,k_eq_16_subtile_m)18792 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, k_eq_16_subtile_m) { 18793 TEST_REQUIRES_ARM_NEON; 18794 for (uint32_t m = 1; m <= 3; m++) { 18795 GemmMicrokernelTester() 18796 .mr(3) 18797 .nr(16) 18798 .kr(16) 18799 .sr(1) 18800 .m(m) 18801 .n(16) 18802 .k(16) 18803 .iterations(1) 18804 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18805 } 18806 } 18807 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,k_eq_16_subtile_n)18808 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, k_eq_16_subtile_n) { 18809 TEST_REQUIRES_ARM_NEON; 18810 for (uint32_t n = 1; n <= 16; n++) { 18811 GemmMicrokernelTester() 18812 .mr(3) 18813 .nr(16) 18814 .kr(16) 18815 .sr(1) 18816 .m(3) 18817 .n(n) 18818 .k(16) 18819 .iterations(1) 18820 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18821 } 18822 } 18823 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,k_lt_16)18824 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, k_lt_16) { 18825 TEST_REQUIRES_ARM_NEON; 18826 for (size_t k = 1; k < 16; k++) { 18827 GemmMicrokernelTester() 18828 .mr(3) 18829 .nr(16) 18830 .kr(16) 18831 .sr(1) 18832 .m(3) 18833 .n(16) 18834 .k(k) 18835 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18836 } 18837 } 18838 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,k_lt_16_subtile)18839 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, k_lt_16_subtile) { 18840 TEST_REQUIRES_ARM_NEON; 18841 for (size_t k = 1; k < 16; k++) { 18842 for (uint32_t n = 1; n <= 16; n++) { 18843 for (uint32_t m = 1; m <= 3; m++) { 18844 GemmMicrokernelTester() 18845 .mr(3) 18846 .nr(16) 18847 .kr(16) 18848 .sr(1) 18849 .m(m) 18850 .n(n) 18851 .k(k) 18852 .iterations(1) 18853 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18854 } 18855 } 18856 } 18857 } 18858 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,k_gt_16)18859 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, k_gt_16) { 18860 TEST_REQUIRES_ARM_NEON; 18861 for (size_t k = 17; k < 32; k++) { 18862 GemmMicrokernelTester() 18863 .mr(3) 18864 .nr(16) 18865 .kr(16) 18866 .sr(1) 18867 .m(3) 18868 .n(16) 18869 .k(k) 18870 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18871 } 18872 } 18873 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,k_gt_16_subtile)18874 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, k_gt_16_subtile) { 18875 TEST_REQUIRES_ARM_NEON; 18876 for (size_t k = 17; k < 32; k++) { 18877 for (uint32_t n = 1; n <= 16; n++) { 18878 for (uint32_t m = 1; m <= 3; m++) { 18879 GemmMicrokernelTester() 18880 .mr(3) 18881 .nr(16) 18882 .kr(16) 18883 .sr(1) 18884 .m(m) 18885 .n(n) 18886 .k(k) 18887 .iterations(1) 18888 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18889 } 18890 } 18891 } 18892 } 18893 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,k_div_16)18894 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, k_div_16) { 18895 TEST_REQUIRES_ARM_NEON; 18896 for (size_t k = 32; k <= 160; k += 16) { 18897 GemmMicrokernelTester() 18898 .mr(3) 18899 .nr(16) 18900 .kr(16) 18901 .sr(1) 18902 .m(3) 18903 .n(16) 18904 .k(k) 18905 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18906 } 18907 } 18908 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,k_div_16_subtile)18909 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, k_div_16_subtile) { 18910 TEST_REQUIRES_ARM_NEON; 18911 for (size_t k = 32; k <= 160; k += 16) { 18912 for (uint32_t n = 1; n <= 16; n++) { 18913 for (uint32_t m = 1; m <= 3; m++) { 18914 GemmMicrokernelTester() 18915 .mr(3) 18916 .nr(16) 18917 .kr(16) 18918 .sr(1) 18919 .m(m) 18920 .n(n) 18921 .k(k) 18922 .iterations(1) 18923 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18924 } 18925 } 18926 } 18927 } 18928 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,n_gt_16)18929 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, n_gt_16) { 18930 TEST_REQUIRES_ARM_NEON; 18931 for (uint32_t n = 17; n < 32; n++) { 18932 for (size_t k = 1; k <= 80; k += 17) { 18933 GemmMicrokernelTester() 18934 .mr(3) 18935 .nr(16) 18936 .kr(16) 18937 .sr(1) 18938 .m(3) 18939 .n(n) 18940 .k(k) 18941 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18942 } 18943 } 18944 } 18945 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,n_gt_16_strided_cn)18946 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, n_gt_16_strided_cn) { 18947 TEST_REQUIRES_ARM_NEON; 18948 for (uint32_t n = 17; n < 32; n++) { 18949 for (size_t k = 1; k <= 80; k += 17) { 18950 GemmMicrokernelTester() 18951 .mr(3) 18952 .nr(16) 18953 .kr(16) 18954 .sr(1) 18955 .m(3) 18956 .n(n) 18957 .k(k) 18958 .cn_stride(19) 18959 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18960 } 18961 } 18962 } 18963 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,n_gt_16_subtile)18964 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, n_gt_16_subtile) { 18965 TEST_REQUIRES_ARM_NEON; 18966 for (uint32_t n = 17; n < 32; n++) { 18967 for (size_t k = 1; k <= 80; k += 17) { 18968 for (uint32_t m = 1; m <= 3; m++) { 18969 GemmMicrokernelTester() 18970 .mr(3) 18971 .nr(16) 18972 .kr(16) 18973 .sr(1) 18974 .m(m) 18975 .n(n) 18976 .k(k) 18977 .iterations(1) 18978 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18979 } 18980 } 18981 } 18982 } 18983 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,n_div_16)18984 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, n_div_16) { 18985 TEST_REQUIRES_ARM_NEON; 18986 for (uint32_t n = 32; n <= 48; n += 16) { 18987 for (size_t k = 1; k <= 80; k += 17) { 18988 GemmMicrokernelTester() 18989 .mr(3) 18990 .nr(16) 18991 .kr(16) 18992 .sr(1) 18993 .m(3) 18994 .n(n) 18995 .k(k) 18996 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18997 } 18998 } 18999 } 19000 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,n_div_16_strided_cn)19001 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, n_div_16_strided_cn) { 19002 TEST_REQUIRES_ARM_NEON; 19003 for (uint32_t n = 32; n <= 48; n += 16) { 19004 for (size_t k = 1; k <= 80; k += 17) { 19005 GemmMicrokernelTester() 19006 .mr(3) 19007 .nr(16) 19008 .kr(16) 19009 .sr(1) 19010 .m(3) 19011 .n(n) 19012 .k(k) 19013 .cn_stride(19) 19014 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19015 } 19016 } 19017 } 19018 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,n_div_16_subtile)19019 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, n_div_16_subtile) { 19020 TEST_REQUIRES_ARM_NEON; 19021 for (uint32_t n = 32; n <= 48; n += 16) { 19022 for (size_t k = 1; k <= 80; k += 17) { 19023 for (uint32_t m = 1; m <= 3; m++) { 19024 GemmMicrokernelTester() 19025 .mr(3) 19026 .nr(16) 19027 .kr(16) 19028 .sr(1) 19029 .m(m) 19030 .n(n) 19031 .k(k) 19032 .iterations(1) 19033 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19034 } 19035 } 19036 } 19037 } 19038 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,small_kernel)19039 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, small_kernel) { 19040 TEST_REQUIRES_ARM_NEON; 19041 for (size_t k = 1; k <= 80; k += 17) { 19042 GemmMicrokernelTester() 19043 .mr(3) 19044 .nr(16) 19045 .kr(16) 19046 .sr(1) 19047 .m(3) 19048 .n(16) 19049 .k(k) 19050 .ks(3) 19051 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19052 } 19053 } 19054 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,small_kernel_subtile)19055 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, small_kernel_subtile) { 19056 TEST_REQUIRES_ARM_NEON; 19057 for (size_t k = 1; k <= 80; k += 17) { 19058 for (uint32_t n = 1; n <= 16; n++) { 19059 for (uint32_t m = 1; m <= 3; m++) { 19060 GemmMicrokernelTester() 19061 .mr(3) 19062 .nr(16) 19063 .kr(16) 19064 .sr(1) 19065 .m(m) 19066 .n(n) 19067 .k(k) 19068 .ks(3) 19069 .iterations(1) 19070 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19071 } 19072 } 19073 } 19074 } 19075 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,n_gt_16_small_kernel)19076 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, n_gt_16_small_kernel) { 19077 TEST_REQUIRES_ARM_NEON; 19078 for (uint32_t n = 17; n < 32; n++) { 19079 for (size_t k = 1; k <= 80; k += 17) { 19080 GemmMicrokernelTester() 19081 .mr(3) 19082 .nr(16) 19083 .kr(16) 19084 .sr(1) 19085 .m(3) 19086 .n(n) 19087 .k(k) 19088 .ks(3) 19089 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19090 } 19091 } 19092 } 19093 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,n_div_16_small_kernel)19094 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, n_div_16_small_kernel) { 19095 TEST_REQUIRES_ARM_NEON; 19096 for (uint32_t n = 32; n <= 48; n += 16) { 19097 for (size_t k = 1; k <= 80; k += 17) { 19098 GemmMicrokernelTester() 19099 .mr(3) 19100 .nr(16) 19101 .kr(16) 19102 .sr(1) 19103 .m(3) 19104 .n(n) 19105 .k(k) 19106 .ks(3) 19107 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19108 } 19109 } 19110 } 19111 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,strided_cm_subtile)19112 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, strided_cm_subtile) { 19113 TEST_REQUIRES_ARM_NEON; 19114 for (size_t k = 1; k <= 80; k += 17) { 19115 for (uint32_t n = 1; n <= 16; n++) { 19116 for (uint32_t m = 1; m <= 3; m++) { 19117 GemmMicrokernelTester() 19118 .mr(3) 19119 .nr(16) 19120 .kr(16) 19121 .sr(1) 19122 .m(m) 19123 .n(n) 19124 .k(k) 19125 .cm_stride(19) 19126 .iterations(1) 19127 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19128 } 19129 } 19130 } 19131 } 19132 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,a_offset)19133 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, a_offset) { 19134 TEST_REQUIRES_ARM_NEON; 19135 for (size_t k = 1; k <= 80; k += 17) { 19136 GemmMicrokernelTester() 19137 .mr(3) 19138 .nr(16) 19139 .kr(16) 19140 .sr(1) 19141 .m(3) 19142 .n(16) 19143 .k(k) 19144 .ks(3) 19145 .a_offset(251) 19146 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19147 } 19148 } 19149 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,zero)19150 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, zero) { 19151 TEST_REQUIRES_ARM_NEON; 19152 for (size_t k = 1; k <= 80; k += 17) { 19153 for (uint32_t mz = 0; mz < 3; mz++) { 19154 GemmMicrokernelTester() 19155 .mr(3) 19156 .nr(16) 19157 .kr(16) 19158 .sr(1) 19159 .m(3) 19160 .n(16) 19161 .k(k) 19162 .ks(3) 19163 .a_offset(251) 19164 .zero_index(mz) 19165 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19166 } 19167 } 19168 } 19169 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,qmin)19170 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, qmin) { 19171 TEST_REQUIRES_ARM_NEON; 19172 GemmMicrokernelTester() 19173 .mr(3) 19174 .nr(16) 19175 .kr(16) 19176 .sr(1) 19177 .m(3) 19178 .n(16) 19179 .k(16) 19180 .qmin(128) 19181 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19182 } 19183 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,qmax)19184 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, qmax) { 19185 TEST_REQUIRES_ARM_NEON; 19186 GemmMicrokernelTester() 19187 .mr(3) 19188 .nr(16) 19189 .kr(16) 19190 .sr(1) 19191 .m(3) 19192 .n(16) 19193 .k(16) 19194 .qmax(128) 19195 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19196 } 19197 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL,strided_cm)19198 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C16__NEON_MLAL, strided_cm) { 19199 TEST_REQUIRES_ARM_NEON; 19200 GemmMicrokernelTester() 19201 .mr(3) 19202 .nr(16) 19203 .kr(16) 19204 .sr(1) 19205 .m(3) 19206 .n(16) 19207 .k(16) 19208 .cm_stride(19) 19209 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19210 } 19211 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 19212 19213 19214 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,k_eq_16)19215 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, k_eq_16) { 19216 TEST_REQUIRES_ARM_NEON; 19217 GemmMicrokernelTester() 19218 .mr(4) 19219 .nr(16) 19220 .kr(16) 19221 .sr(1) 19222 .m(4) 19223 .n(16) 19224 .k(16) 19225 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19226 } 19227 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,strided_cn)19228 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, strided_cn) { 19229 TEST_REQUIRES_ARM_NEON; 19230 GemmMicrokernelTester() 19231 .mr(4) 19232 .nr(16) 19233 .kr(16) 19234 .sr(1) 19235 .m(4) 19236 .n(16) 19237 .k(16) 19238 .cn_stride(19) 19239 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19240 } 19241 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,k_eq_16_subtile)19242 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, k_eq_16_subtile) { 19243 TEST_REQUIRES_ARM_NEON; 19244 for (uint32_t n = 1; n <= 16; n++) { 19245 for (uint32_t m = 1; m <= 4; m++) { 19246 GemmMicrokernelTester() 19247 .mr(4) 19248 .nr(16) 19249 .kr(16) 19250 .sr(1) 19251 .m(m) 19252 .n(n) 19253 .k(16) 19254 .iterations(1) 19255 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19256 } 19257 } 19258 } 19259 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,k_eq_16_subtile_m)19260 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, k_eq_16_subtile_m) { 19261 TEST_REQUIRES_ARM_NEON; 19262 for (uint32_t m = 1; m <= 4; m++) { 19263 GemmMicrokernelTester() 19264 .mr(4) 19265 .nr(16) 19266 .kr(16) 19267 .sr(1) 19268 .m(m) 19269 .n(16) 19270 .k(16) 19271 .iterations(1) 19272 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19273 } 19274 } 19275 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,k_eq_16_subtile_n)19276 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, k_eq_16_subtile_n) { 19277 TEST_REQUIRES_ARM_NEON; 19278 for (uint32_t n = 1; n <= 16; n++) { 19279 GemmMicrokernelTester() 19280 .mr(4) 19281 .nr(16) 19282 .kr(16) 19283 .sr(1) 19284 .m(4) 19285 .n(n) 19286 .k(16) 19287 .iterations(1) 19288 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19289 } 19290 } 19291 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,k_lt_16)19292 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, k_lt_16) { 19293 TEST_REQUIRES_ARM_NEON; 19294 for (size_t k = 1; k < 16; k++) { 19295 GemmMicrokernelTester() 19296 .mr(4) 19297 .nr(16) 19298 .kr(16) 19299 .sr(1) 19300 .m(4) 19301 .n(16) 19302 .k(k) 19303 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19304 } 19305 } 19306 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,k_lt_16_subtile)19307 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, k_lt_16_subtile) { 19308 TEST_REQUIRES_ARM_NEON; 19309 for (size_t k = 1; k < 16; k++) { 19310 for (uint32_t n = 1; n <= 16; n++) { 19311 for (uint32_t m = 1; m <= 4; m++) { 19312 GemmMicrokernelTester() 19313 .mr(4) 19314 .nr(16) 19315 .kr(16) 19316 .sr(1) 19317 .m(m) 19318 .n(n) 19319 .k(k) 19320 .iterations(1) 19321 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19322 } 19323 } 19324 } 19325 } 19326 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,k_gt_16)19327 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, k_gt_16) { 19328 TEST_REQUIRES_ARM_NEON; 19329 for (size_t k = 17; k < 32; k++) { 19330 GemmMicrokernelTester() 19331 .mr(4) 19332 .nr(16) 19333 .kr(16) 19334 .sr(1) 19335 .m(4) 19336 .n(16) 19337 .k(k) 19338 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19339 } 19340 } 19341 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,k_gt_16_subtile)19342 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, k_gt_16_subtile) { 19343 TEST_REQUIRES_ARM_NEON; 19344 for (size_t k = 17; k < 32; k++) { 19345 for (uint32_t n = 1; n <= 16; n++) { 19346 for (uint32_t m = 1; m <= 4; m++) { 19347 GemmMicrokernelTester() 19348 .mr(4) 19349 .nr(16) 19350 .kr(16) 19351 .sr(1) 19352 .m(m) 19353 .n(n) 19354 .k(k) 19355 .iterations(1) 19356 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19357 } 19358 } 19359 } 19360 } 19361 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,k_div_16)19362 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, k_div_16) { 19363 TEST_REQUIRES_ARM_NEON; 19364 for (size_t k = 32; k <= 160; k += 16) { 19365 GemmMicrokernelTester() 19366 .mr(4) 19367 .nr(16) 19368 .kr(16) 19369 .sr(1) 19370 .m(4) 19371 .n(16) 19372 .k(k) 19373 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19374 } 19375 } 19376 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,k_div_16_subtile)19377 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, k_div_16_subtile) { 19378 TEST_REQUIRES_ARM_NEON; 19379 for (size_t k = 32; k <= 160; k += 16) { 19380 for (uint32_t n = 1; n <= 16; n++) { 19381 for (uint32_t m = 1; m <= 4; m++) { 19382 GemmMicrokernelTester() 19383 .mr(4) 19384 .nr(16) 19385 .kr(16) 19386 .sr(1) 19387 .m(m) 19388 .n(n) 19389 .k(k) 19390 .iterations(1) 19391 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19392 } 19393 } 19394 } 19395 } 19396 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,n_gt_16)19397 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, n_gt_16) { 19398 TEST_REQUIRES_ARM_NEON; 19399 for (uint32_t n = 17; n < 32; n++) { 19400 for (size_t k = 1; k <= 80; k += 17) { 19401 GemmMicrokernelTester() 19402 .mr(4) 19403 .nr(16) 19404 .kr(16) 19405 .sr(1) 19406 .m(4) 19407 .n(n) 19408 .k(k) 19409 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19410 } 19411 } 19412 } 19413 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,n_gt_16_strided_cn)19414 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, n_gt_16_strided_cn) { 19415 TEST_REQUIRES_ARM_NEON; 19416 for (uint32_t n = 17; n < 32; n++) { 19417 for (size_t k = 1; k <= 80; k += 17) { 19418 GemmMicrokernelTester() 19419 .mr(4) 19420 .nr(16) 19421 .kr(16) 19422 .sr(1) 19423 .m(4) 19424 .n(n) 19425 .k(k) 19426 .cn_stride(19) 19427 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19428 } 19429 } 19430 } 19431 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,n_gt_16_subtile)19432 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, n_gt_16_subtile) { 19433 TEST_REQUIRES_ARM_NEON; 19434 for (uint32_t n = 17; n < 32; n++) { 19435 for (size_t k = 1; k <= 80; k += 17) { 19436 for (uint32_t m = 1; m <= 4; m++) { 19437 GemmMicrokernelTester() 19438 .mr(4) 19439 .nr(16) 19440 .kr(16) 19441 .sr(1) 19442 .m(m) 19443 .n(n) 19444 .k(k) 19445 .iterations(1) 19446 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19447 } 19448 } 19449 } 19450 } 19451 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,n_div_16)19452 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, n_div_16) { 19453 TEST_REQUIRES_ARM_NEON; 19454 for (uint32_t n = 32; n <= 48; n += 16) { 19455 for (size_t k = 1; k <= 80; k += 17) { 19456 GemmMicrokernelTester() 19457 .mr(4) 19458 .nr(16) 19459 .kr(16) 19460 .sr(1) 19461 .m(4) 19462 .n(n) 19463 .k(k) 19464 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19465 } 19466 } 19467 } 19468 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,n_div_16_strided_cn)19469 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, n_div_16_strided_cn) { 19470 TEST_REQUIRES_ARM_NEON; 19471 for (uint32_t n = 32; n <= 48; n += 16) { 19472 for (size_t k = 1; k <= 80; k += 17) { 19473 GemmMicrokernelTester() 19474 .mr(4) 19475 .nr(16) 19476 .kr(16) 19477 .sr(1) 19478 .m(4) 19479 .n(n) 19480 .k(k) 19481 .cn_stride(19) 19482 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19483 } 19484 } 19485 } 19486 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,n_div_16_subtile)19487 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, n_div_16_subtile) { 19488 TEST_REQUIRES_ARM_NEON; 19489 for (uint32_t n = 32; n <= 48; n += 16) { 19490 for (size_t k = 1; k <= 80; k += 17) { 19491 for (uint32_t m = 1; m <= 4; m++) { 19492 GemmMicrokernelTester() 19493 .mr(4) 19494 .nr(16) 19495 .kr(16) 19496 .sr(1) 19497 .m(m) 19498 .n(n) 19499 .k(k) 19500 .iterations(1) 19501 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19502 } 19503 } 19504 } 19505 } 19506 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,small_kernel)19507 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, small_kernel) { 19508 TEST_REQUIRES_ARM_NEON; 19509 for (size_t k = 1; k <= 80; k += 17) { 19510 GemmMicrokernelTester() 19511 .mr(4) 19512 .nr(16) 19513 .kr(16) 19514 .sr(1) 19515 .m(4) 19516 .n(16) 19517 .k(k) 19518 .ks(3) 19519 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19520 } 19521 } 19522 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,small_kernel_subtile)19523 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, small_kernel_subtile) { 19524 TEST_REQUIRES_ARM_NEON; 19525 for (size_t k = 1; k <= 80; k += 17) { 19526 for (uint32_t n = 1; n <= 16; n++) { 19527 for (uint32_t m = 1; m <= 4; m++) { 19528 GemmMicrokernelTester() 19529 .mr(4) 19530 .nr(16) 19531 .kr(16) 19532 .sr(1) 19533 .m(m) 19534 .n(n) 19535 .k(k) 19536 .ks(3) 19537 .iterations(1) 19538 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19539 } 19540 } 19541 } 19542 } 19543 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,n_gt_16_small_kernel)19544 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, n_gt_16_small_kernel) { 19545 TEST_REQUIRES_ARM_NEON; 19546 for (uint32_t n = 17; n < 32; n++) { 19547 for (size_t k = 1; k <= 80; k += 17) { 19548 GemmMicrokernelTester() 19549 .mr(4) 19550 .nr(16) 19551 .kr(16) 19552 .sr(1) 19553 .m(4) 19554 .n(n) 19555 .k(k) 19556 .ks(3) 19557 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19558 } 19559 } 19560 } 19561 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,n_div_16_small_kernel)19562 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, n_div_16_small_kernel) { 19563 TEST_REQUIRES_ARM_NEON; 19564 for (uint32_t n = 32; n <= 48; n += 16) { 19565 for (size_t k = 1; k <= 80; k += 17) { 19566 GemmMicrokernelTester() 19567 .mr(4) 19568 .nr(16) 19569 .kr(16) 19570 .sr(1) 19571 .m(4) 19572 .n(n) 19573 .k(k) 19574 .ks(3) 19575 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19576 } 19577 } 19578 } 19579 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,strided_cm_subtile)19580 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, strided_cm_subtile) { 19581 TEST_REQUIRES_ARM_NEON; 19582 for (size_t k = 1; k <= 80; k += 17) { 19583 for (uint32_t n = 1; n <= 16; n++) { 19584 for (uint32_t m = 1; m <= 4; m++) { 19585 GemmMicrokernelTester() 19586 .mr(4) 19587 .nr(16) 19588 .kr(16) 19589 .sr(1) 19590 .m(m) 19591 .n(n) 19592 .k(k) 19593 .cm_stride(19) 19594 .iterations(1) 19595 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19596 } 19597 } 19598 } 19599 } 19600 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,a_offset)19601 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, a_offset) { 19602 TEST_REQUIRES_ARM_NEON; 19603 for (size_t k = 1; k <= 80; k += 17) { 19604 GemmMicrokernelTester() 19605 .mr(4) 19606 .nr(16) 19607 .kr(16) 19608 .sr(1) 19609 .m(4) 19610 .n(16) 19611 .k(k) 19612 .ks(3) 19613 .a_offset(331) 19614 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19615 } 19616 } 19617 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,zero)19618 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, zero) { 19619 TEST_REQUIRES_ARM_NEON; 19620 for (size_t k = 1; k <= 80; k += 17) { 19621 for (uint32_t mz = 0; mz < 4; mz++) { 19622 GemmMicrokernelTester() 19623 .mr(4) 19624 .nr(16) 19625 .kr(16) 19626 .sr(1) 19627 .m(4) 19628 .n(16) 19629 .k(k) 19630 .ks(3) 19631 .a_offset(331) 19632 .zero_index(mz) 19633 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19634 } 19635 } 19636 } 19637 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,qmin)19638 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, qmin) { 19639 TEST_REQUIRES_ARM_NEON; 19640 GemmMicrokernelTester() 19641 .mr(4) 19642 .nr(16) 19643 .kr(16) 19644 .sr(1) 19645 .m(4) 19646 .n(16) 19647 .k(16) 19648 .qmin(128) 19649 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19650 } 19651 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,qmax)19652 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, qmax) { 19653 TEST_REQUIRES_ARM_NEON; 19654 GemmMicrokernelTester() 19655 .mr(4) 19656 .nr(16) 19657 .kr(16) 19658 .sr(1) 19659 .m(4) 19660 .n(16) 19661 .k(16) 19662 .qmax(128) 19663 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19664 } 19665 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL,strided_cm)19666 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, strided_cm) { 19667 TEST_REQUIRES_ARM_NEON; 19668 GemmMicrokernelTester() 19669 .mr(4) 19670 .nr(16) 19671 .kr(16) 19672 .sr(1) 19673 .m(4) 19674 .n(16) 19675 .k(16) 19676 .cm_stride(19) 19677 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19678 } 19679 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 19680 19681 19682 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_eq_8)19683 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_eq_8) { 19684 TEST_REQUIRES_ARM_NEON_DOT; 19685 GemmMicrokernelTester() 19686 .mr(6) 19687 .nr(16) 19688 .kr(4) 19689 .sr(1) 19690 .m(6) 19691 .n(16) 19692 .k(8) 19693 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19694 } 19695 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,strided_cn)19696 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, strided_cn) { 19697 TEST_REQUIRES_ARM_NEON_DOT; 19698 GemmMicrokernelTester() 19699 .mr(6) 19700 .nr(16) 19701 .kr(4) 19702 .sr(1) 19703 .m(6) 19704 .n(16) 19705 .k(8) 19706 .cn_stride(19) 19707 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19708 } 19709 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_eq_8_subtile)19710 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_eq_8_subtile) { 19711 TEST_REQUIRES_ARM_NEON_DOT; 19712 for (uint32_t n = 1; n <= 16; n++) { 19713 for (uint32_t m = 1; m <= 6; m++) { 19714 GemmMicrokernelTester() 19715 .mr(6) 19716 .nr(16) 19717 .kr(4) 19718 .sr(1) 19719 .m(m) 19720 .n(n) 19721 .k(8) 19722 .iterations(1) 19723 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19724 } 19725 } 19726 } 19727 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_eq_8_subtile_m)19728 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_eq_8_subtile_m) { 19729 TEST_REQUIRES_ARM_NEON_DOT; 19730 for (uint32_t m = 1; m <= 6; m++) { 19731 GemmMicrokernelTester() 19732 .mr(6) 19733 .nr(16) 19734 .kr(4) 19735 .sr(1) 19736 .m(m) 19737 .n(16) 19738 .k(8) 19739 .iterations(1) 19740 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19741 } 19742 } 19743 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_eq_8_subtile_n)19744 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_eq_8_subtile_n) { 19745 TEST_REQUIRES_ARM_NEON_DOT; 19746 for (uint32_t n = 1; n <= 16; n++) { 19747 GemmMicrokernelTester() 19748 .mr(6) 19749 .nr(16) 19750 .kr(4) 19751 .sr(1) 19752 .m(6) 19753 .n(n) 19754 .k(8) 19755 .iterations(1) 19756 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19757 } 19758 } 19759 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_lt_8)19760 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_lt_8) { 19761 TEST_REQUIRES_ARM_NEON_DOT; 19762 for (size_t k = 1; k < 8; k++) { 19763 GemmMicrokernelTester() 19764 .mr(6) 19765 .nr(16) 19766 .kr(4) 19767 .sr(1) 19768 .m(6) 19769 .n(16) 19770 .k(k) 19771 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19772 } 19773 } 19774 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_lt_8_subtile)19775 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_lt_8_subtile) { 19776 TEST_REQUIRES_ARM_NEON_DOT; 19777 for (size_t k = 1; k < 8; k++) { 19778 for (uint32_t n = 1; n <= 16; n++) { 19779 for (uint32_t m = 1; m <= 6; m++) { 19780 GemmMicrokernelTester() 19781 .mr(6) 19782 .nr(16) 19783 .kr(4) 19784 .sr(1) 19785 .m(m) 19786 .n(n) 19787 .k(k) 19788 .iterations(1) 19789 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19790 } 19791 } 19792 } 19793 } 19794 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_gt_8)19795 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_gt_8) { 19796 TEST_REQUIRES_ARM_NEON_DOT; 19797 for (size_t k = 9; k < 16; k++) { 19798 GemmMicrokernelTester() 19799 .mr(6) 19800 .nr(16) 19801 .kr(4) 19802 .sr(1) 19803 .m(6) 19804 .n(16) 19805 .k(k) 19806 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19807 } 19808 } 19809 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_gt_8_subtile)19810 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_gt_8_subtile) { 19811 TEST_REQUIRES_ARM_NEON_DOT; 19812 for (size_t k = 9; k < 16; k++) { 19813 for (uint32_t n = 1; n <= 16; n++) { 19814 for (uint32_t m = 1; m <= 6; m++) { 19815 GemmMicrokernelTester() 19816 .mr(6) 19817 .nr(16) 19818 .kr(4) 19819 .sr(1) 19820 .m(m) 19821 .n(n) 19822 .k(k) 19823 .iterations(1) 19824 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19825 } 19826 } 19827 } 19828 } 19829 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_div_8)19830 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_div_8) { 19831 TEST_REQUIRES_ARM_NEON_DOT; 19832 for (size_t k = 16; k <= 80; k += 8) { 19833 GemmMicrokernelTester() 19834 .mr(6) 19835 .nr(16) 19836 .kr(4) 19837 .sr(1) 19838 .m(6) 19839 .n(16) 19840 .k(k) 19841 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19842 } 19843 } 19844 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,k_div_8_subtile)19845 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_div_8_subtile) { 19846 TEST_REQUIRES_ARM_NEON_DOT; 19847 for (size_t k = 16; k <= 80; k += 8) { 19848 for (uint32_t n = 1; n <= 16; n++) { 19849 for (uint32_t m = 1; m <= 6; m++) { 19850 GemmMicrokernelTester() 19851 .mr(6) 19852 .nr(16) 19853 .kr(4) 19854 .sr(1) 19855 .m(m) 19856 .n(n) 19857 .k(k) 19858 .iterations(1) 19859 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19860 } 19861 } 19862 } 19863 } 19864 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,n_gt_16)19865 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_gt_16) { 19866 TEST_REQUIRES_ARM_NEON_DOT; 19867 for (uint32_t n = 17; n < 32; n++) { 19868 for (size_t k = 1; k <= 40; k += 9) { 19869 GemmMicrokernelTester() 19870 .mr(6) 19871 .nr(16) 19872 .kr(4) 19873 .sr(1) 19874 .m(6) 19875 .n(n) 19876 .k(k) 19877 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19878 } 19879 } 19880 } 19881 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,n_gt_16_strided_cn)19882 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_gt_16_strided_cn) { 19883 TEST_REQUIRES_ARM_NEON_DOT; 19884 for (uint32_t n = 17; n < 32; n++) { 19885 for (size_t k = 1; k <= 40; k += 9) { 19886 GemmMicrokernelTester() 19887 .mr(6) 19888 .nr(16) 19889 .kr(4) 19890 .sr(1) 19891 .m(6) 19892 .n(n) 19893 .k(k) 19894 .cn_stride(19) 19895 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19896 } 19897 } 19898 } 19899 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,n_gt_16_subtile)19900 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_gt_16_subtile) { 19901 TEST_REQUIRES_ARM_NEON_DOT; 19902 for (uint32_t n = 17; n < 32; n++) { 19903 for (size_t k = 1; k <= 40; k += 9) { 19904 for (uint32_t m = 1; m <= 6; m++) { 19905 GemmMicrokernelTester() 19906 .mr(6) 19907 .nr(16) 19908 .kr(4) 19909 .sr(1) 19910 .m(m) 19911 .n(n) 19912 .k(k) 19913 .iterations(1) 19914 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19915 } 19916 } 19917 } 19918 } 19919 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,n_div_16)19920 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_div_16) { 19921 TEST_REQUIRES_ARM_NEON_DOT; 19922 for (uint32_t n = 32; n <= 48; n += 16) { 19923 for (size_t k = 1; k <= 40; k += 9) { 19924 GemmMicrokernelTester() 19925 .mr(6) 19926 .nr(16) 19927 .kr(4) 19928 .sr(1) 19929 .m(6) 19930 .n(n) 19931 .k(k) 19932 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19933 } 19934 } 19935 } 19936 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,n_div_16_strided_cn)19937 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_div_16_strided_cn) { 19938 TEST_REQUIRES_ARM_NEON_DOT; 19939 for (uint32_t n = 32; n <= 48; n += 16) { 19940 for (size_t k = 1; k <= 40; k += 9) { 19941 GemmMicrokernelTester() 19942 .mr(6) 19943 .nr(16) 19944 .kr(4) 19945 .sr(1) 19946 .m(6) 19947 .n(n) 19948 .k(k) 19949 .cn_stride(19) 19950 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19951 } 19952 } 19953 } 19954 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,n_div_16_subtile)19955 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_div_16_subtile) { 19956 TEST_REQUIRES_ARM_NEON_DOT; 19957 for (uint32_t n = 32; n <= 48; n += 16) { 19958 for (size_t k = 1; k <= 40; k += 9) { 19959 for (uint32_t m = 1; m <= 6; m++) { 19960 GemmMicrokernelTester() 19961 .mr(6) 19962 .nr(16) 19963 .kr(4) 19964 .sr(1) 19965 .m(m) 19966 .n(n) 19967 .k(k) 19968 .iterations(1) 19969 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19970 } 19971 } 19972 } 19973 } 19974 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,small_kernel)19975 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, small_kernel) { 19976 TEST_REQUIRES_ARM_NEON_DOT; 19977 for (size_t k = 1; k <= 40; k += 9) { 19978 GemmMicrokernelTester() 19979 .mr(6) 19980 .nr(16) 19981 .kr(4) 19982 .sr(1) 19983 .m(6) 19984 .n(16) 19985 .k(k) 19986 .ks(3) 19987 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19988 } 19989 } 19990 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,small_kernel_subtile)19991 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, small_kernel_subtile) { 19992 TEST_REQUIRES_ARM_NEON_DOT; 19993 for (size_t k = 1; k <= 40; k += 9) { 19994 for (uint32_t n = 1; n <= 16; n++) { 19995 for (uint32_t m = 1; m <= 6; m++) { 19996 GemmMicrokernelTester() 19997 .mr(6) 19998 .nr(16) 19999 .kr(4) 20000 .sr(1) 20001 .m(m) 20002 .n(n) 20003 .k(k) 20004 .ks(3) 20005 .iterations(1) 20006 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20007 } 20008 } 20009 } 20010 } 20011 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,n_gt_16_small_kernel)20012 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_gt_16_small_kernel) { 20013 TEST_REQUIRES_ARM_NEON_DOT; 20014 for (uint32_t n = 17; n < 32; n++) { 20015 for (size_t k = 1; k <= 40; k += 9) { 20016 GemmMicrokernelTester() 20017 .mr(6) 20018 .nr(16) 20019 .kr(4) 20020 .sr(1) 20021 .m(6) 20022 .n(n) 20023 .k(k) 20024 .ks(3) 20025 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20026 } 20027 } 20028 } 20029 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,n_div_16_small_kernel)20030 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_div_16_small_kernel) { 20031 TEST_REQUIRES_ARM_NEON_DOT; 20032 for (uint32_t n = 32; n <= 48; n += 16) { 20033 for (size_t k = 1; k <= 40; k += 9) { 20034 GemmMicrokernelTester() 20035 .mr(6) 20036 .nr(16) 20037 .kr(4) 20038 .sr(1) 20039 .m(6) 20040 .n(n) 20041 .k(k) 20042 .ks(3) 20043 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20044 } 20045 } 20046 } 20047 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,strided_cm_subtile)20048 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, strided_cm_subtile) { 20049 TEST_REQUIRES_ARM_NEON_DOT; 20050 for (size_t k = 1; k <= 40; k += 9) { 20051 for (uint32_t n = 1; n <= 16; n++) { 20052 for (uint32_t m = 1; m <= 6; m++) { 20053 GemmMicrokernelTester() 20054 .mr(6) 20055 .nr(16) 20056 .kr(4) 20057 .sr(1) 20058 .m(m) 20059 .n(n) 20060 .k(k) 20061 .cm_stride(19) 20062 .iterations(1) 20063 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20064 } 20065 } 20066 } 20067 } 20068 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,a_offset)20069 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, a_offset) { 20070 TEST_REQUIRES_ARM_NEON_DOT; 20071 for (size_t k = 1; k <= 40; k += 9) { 20072 GemmMicrokernelTester() 20073 .mr(6) 20074 .nr(16) 20075 .kr(4) 20076 .sr(1) 20077 .m(6) 20078 .n(16) 20079 .k(k) 20080 .ks(3) 20081 .a_offset(251) 20082 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20083 } 20084 } 20085 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,zero)20086 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, zero) { 20087 TEST_REQUIRES_ARM_NEON_DOT; 20088 for (size_t k = 1; k <= 40; k += 9) { 20089 for (uint32_t mz = 0; mz < 6; mz++) { 20090 GemmMicrokernelTester() 20091 .mr(6) 20092 .nr(16) 20093 .kr(4) 20094 .sr(1) 20095 .m(6) 20096 .n(16) 20097 .k(k) 20098 .ks(3) 20099 .a_offset(251) 20100 .zero_index(mz) 20101 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20102 } 20103 } 20104 } 20105 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,qmin)20106 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, qmin) { 20107 TEST_REQUIRES_ARM_NEON_DOT; 20108 GemmMicrokernelTester() 20109 .mr(6) 20110 .nr(16) 20111 .kr(4) 20112 .sr(1) 20113 .m(6) 20114 .n(16) 20115 .k(8) 20116 .qmin(128) 20117 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20118 } 20119 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,qmax)20120 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, qmax) { 20121 TEST_REQUIRES_ARM_NEON_DOT; 20122 GemmMicrokernelTester() 20123 .mr(6) 20124 .nr(16) 20125 .kr(4) 20126 .sr(1) 20127 .m(6) 20128 .n(16) 20129 .k(8) 20130 .qmax(128) 20131 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20132 } 20133 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT,strided_cm)20134 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, strided_cm) { 20135 TEST_REQUIRES_ARM_NEON_DOT; 20136 GemmMicrokernelTester() 20137 .mr(6) 20138 .nr(16) 20139 .kr(4) 20140 .sr(1) 20141 .m(6) 20142 .n(16) 20143 .k(8) 20144 .cm_stride(19) 20145 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20146 } 20147 #endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64 20148 20149 20150 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_eq_8)20151 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_eq_8) { 20152 TEST_REQUIRES_ARM_NEON; 20153 GemmMicrokernelTester() 20154 .mr(6) 20155 .nr(8) 20156 .kr(1) 20157 .sr(1) 20158 .m(6) 20159 .n(8) 20160 .k(8) 20161 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20162 } 20163 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,strided_cn)20164 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, strided_cn) { 20165 TEST_REQUIRES_ARM_NEON; 20166 GemmMicrokernelTester() 20167 .mr(6) 20168 .nr(8) 20169 .kr(1) 20170 .sr(1) 20171 .m(6) 20172 .n(8) 20173 .k(8) 20174 .cn_stride(11) 20175 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20176 } 20177 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_eq_8_subtile)20178 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_eq_8_subtile) { 20179 TEST_REQUIRES_ARM_NEON; 20180 for (uint32_t n = 1; n <= 8; n++) { 20181 for (uint32_t m = 1; m <= 6; m++) { 20182 GemmMicrokernelTester() 20183 .mr(6) 20184 .nr(8) 20185 .kr(1) 20186 .sr(1) 20187 .m(m) 20188 .n(n) 20189 .k(8) 20190 .iterations(1) 20191 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20192 } 20193 } 20194 } 20195 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_eq_8_subtile_m)20196 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_eq_8_subtile_m) { 20197 TEST_REQUIRES_ARM_NEON; 20198 for (uint32_t m = 1; m <= 6; m++) { 20199 GemmMicrokernelTester() 20200 .mr(6) 20201 .nr(8) 20202 .kr(1) 20203 .sr(1) 20204 .m(m) 20205 .n(8) 20206 .k(8) 20207 .iterations(1) 20208 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20209 } 20210 } 20211 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_eq_8_subtile_n)20212 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_eq_8_subtile_n) { 20213 TEST_REQUIRES_ARM_NEON; 20214 for (uint32_t n = 1; n <= 8; n++) { 20215 GemmMicrokernelTester() 20216 .mr(6) 20217 .nr(8) 20218 .kr(1) 20219 .sr(1) 20220 .m(6) 20221 .n(n) 20222 .k(8) 20223 .iterations(1) 20224 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20225 } 20226 } 20227 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_lt_8)20228 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_lt_8) { 20229 TEST_REQUIRES_ARM_NEON; 20230 for (size_t k = 1; k < 8; k++) { 20231 GemmMicrokernelTester() 20232 .mr(6) 20233 .nr(8) 20234 .kr(1) 20235 .sr(1) 20236 .m(6) 20237 .n(8) 20238 .k(k) 20239 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20240 } 20241 } 20242 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_lt_8_subtile)20243 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_lt_8_subtile) { 20244 TEST_REQUIRES_ARM_NEON; 20245 for (size_t k = 1; k < 8; k++) { 20246 for (uint32_t n = 1; n <= 8; n++) { 20247 for (uint32_t m = 1; m <= 6; m++) { 20248 GemmMicrokernelTester() 20249 .mr(6) 20250 .nr(8) 20251 .kr(1) 20252 .sr(1) 20253 .m(m) 20254 .n(n) 20255 .k(k) 20256 .iterations(1) 20257 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20258 } 20259 } 20260 } 20261 } 20262 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_gt_8)20263 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_gt_8) { 20264 TEST_REQUIRES_ARM_NEON; 20265 for (size_t k = 9; k < 16; k++) { 20266 GemmMicrokernelTester() 20267 .mr(6) 20268 .nr(8) 20269 .kr(1) 20270 .sr(1) 20271 .m(6) 20272 .n(8) 20273 .k(k) 20274 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20275 } 20276 } 20277 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_gt_8_subtile)20278 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_gt_8_subtile) { 20279 TEST_REQUIRES_ARM_NEON; 20280 for (size_t k = 9; k < 16; k++) { 20281 for (uint32_t n = 1; n <= 8; n++) { 20282 for (uint32_t m = 1; m <= 6; m++) { 20283 GemmMicrokernelTester() 20284 .mr(6) 20285 .nr(8) 20286 .kr(1) 20287 .sr(1) 20288 .m(m) 20289 .n(n) 20290 .k(k) 20291 .iterations(1) 20292 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20293 } 20294 } 20295 } 20296 } 20297 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_div_8)20298 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_div_8) { 20299 TEST_REQUIRES_ARM_NEON; 20300 for (size_t k = 16; k <= 80; k += 8) { 20301 GemmMicrokernelTester() 20302 .mr(6) 20303 .nr(8) 20304 .kr(1) 20305 .sr(1) 20306 .m(6) 20307 .n(8) 20308 .k(k) 20309 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20310 } 20311 } 20312 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,k_div_8_subtile)20313 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_div_8_subtile) { 20314 TEST_REQUIRES_ARM_NEON; 20315 for (size_t k = 16; k <= 80; k += 8) { 20316 for (uint32_t n = 1; n <= 8; n++) { 20317 for (uint32_t m = 1; m <= 6; m++) { 20318 GemmMicrokernelTester() 20319 .mr(6) 20320 .nr(8) 20321 .kr(1) 20322 .sr(1) 20323 .m(m) 20324 .n(n) 20325 .k(k) 20326 .iterations(1) 20327 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20328 } 20329 } 20330 } 20331 } 20332 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,n_gt_8)20333 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_gt_8) { 20334 TEST_REQUIRES_ARM_NEON; 20335 for (uint32_t n = 9; n < 16; n++) { 20336 for (size_t k = 1; k <= 40; k += 9) { 20337 GemmMicrokernelTester() 20338 .mr(6) 20339 .nr(8) 20340 .kr(1) 20341 .sr(1) 20342 .m(6) 20343 .n(n) 20344 .k(k) 20345 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20346 } 20347 } 20348 } 20349 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,n_gt_8_strided_cn)20350 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_gt_8_strided_cn) { 20351 TEST_REQUIRES_ARM_NEON; 20352 for (uint32_t n = 9; n < 16; n++) { 20353 for (size_t k = 1; k <= 40; k += 9) { 20354 GemmMicrokernelTester() 20355 .mr(6) 20356 .nr(8) 20357 .kr(1) 20358 .sr(1) 20359 .m(6) 20360 .n(n) 20361 .k(k) 20362 .cn_stride(11) 20363 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20364 } 20365 } 20366 } 20367 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,n_gt_8_subtile)20368 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_gt_8_subtile) { 20369 TEST_REQUIRES_ARM_NEON; 20370 for (uint32_t n = 9; n < 16; n++) { 20371 for (size_t k = 1; k <= 40; k += 9) { 20372 for (uint32_t m = 1; m <= 6; m++) { 20373 GemmMicrokernelTester() 20374 .mr(6) 20375 .nr(8) 20376 .kr(1) 20377 .sr(1) 20378 .m(m) 20379 .n(n) 20380 .k(k) 20381 .iterations(1) 20382 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20383 } 20384 } 20385 } 20386 } 20387 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,n_div_8)20388 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_div_8) { 20389 TEST_REQUIRES_ARM_NEON; 20390 for (uint32_t n = 16; n <= 24; n += 8) { 20391 for (size_t k = 1; k <= 40; k += 9) { 20392 GemmMicrokernelTester() 20393 .mr(6) 20394 .nr(8) 20395 .kr(1) 20396 .sr(1) 20397 .m(6) 20398 .n(n) 20399 .k(k) 20400 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20401 } 20402 } 20403 } 20404 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,n_div_8_strided_cn)20405 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_div_8_strided_cn) { 20406 TEST_REQUIRES_ARM_NEON; 20407 for (uint32_t n = 16; n <= 24; n += 8) { 20408 for (size_t k = 1; k <= 40; k += 9) { 20409 GemmMicrokernelTester() 20410 .mr(6) 20411 .nr(8) 20412 .kr(1) 20413 .sr(1) 20414 .m(6) 20415 .n(n) 20416 .k(k) 20417 .cn_stride(11) 20418 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20419 } 20420 } 20421 } 20422 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,n_div_8_subtile)20423 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_div_8_subtile) { 20424 TEST_REQUIRES_ARM_NEON; 20425 for (uint32_t n = 16; n <= 24; n += 8) { 20426 for (size_t k = 1; k <= 40; k += 9) { 20427 for (uint32_t m = 1; m <= 6; m++) { 20428 GemmMicrokernelTester() 20429 .mr(6) 20430 .nr(8) 20431 .kr(1) 20432 .sr(1) 20433 .m(m) 20434 .n(n) 20435 .k(k) 20436 .iterations(1) 20437 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20438 } 20439 } 20440 } 20441 } 20442 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,small_kernel)20443 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, small_kernel) { 20444 TEST_REQUIRES_ARM_NEON; 20445 for (size_t k = 1; k <= 40; k += 9) { 20446 GemmMicrokernelTester() 20447 .mr(6) 20448 .nr(8) 20449 .kr(1) 20450 .sr(1) 20451 .m(6) 20452 .n(8) 20453 .k(k) 20454 .ks(3) 20455 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20456 } 20457 } 20458 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,small_kernel_subtile)20459 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, small_kernel_subtile) { 20460 TEST_REQUIRES_ARM_NEON; 20461 for (size_t k = 1; k <= 40; k += 9) { 20462 for (uint32_t n = 1; n <= 8; n++) { 20463 for (uint32_t m = 1; m <= 6; m++) { 20464 GemmMicrokernelTester() 20465 .mr(6) 20466 .nr(8) 20467 .kr(1) 20468 .sr(1) 20469 .m(m) 20470 .n(n) 20471 .k(k) 20472 .ks(3) 20473 .iterations(1) 20474 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20475 } 20476 } 20477 } 20478 } 20479 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,n_gt_8_small_kernel)20480 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_gt_8_small_kernel) { 20481 TEST_REQUIRES_ARM_NEON; 20482 for (uint32_t n = 9; n < 16; n++) { 20483 for (size_t k = 1; k <= 40; k += 9) { 20484 GemmMicrokernelTester() 20485 .mr(6) 20486 .nr(8) 20487 .kr(1) 20488 .sr(1) 20489 .m(6) 20490 .n(n) 20491 .k(k) 20492 .ks(3) 20493 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20494 } 20495 } 20496 } 20497 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,n_div_8_small_kernel)20498 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_div_8_small_kernel) { 20499 TEST_REQUIRES_ARM_NEON; 20500 for (uint32_t n = 16; n <= 24; n += 8) { 20501 for (size_t k = 1; k <= 40; k += 9) { 20502 GemmMicrokernelTester() 20503 .mr(6) 20504 .nr(8) 20505 .kr(1) 20506 .sr(1) 20507 .m(6) 20508 .n(n) 20509 .k(k) 20510 .ks(3) 20511 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20512 } 20513 } 20514 } 20515 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,strided_cm_subtile)20516 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, strided_cm_subtile) { 20517 TEST_REQUIRES_ARM_NEON; 20518 for (size_t k = 1; k <= 40; k += 9) { 20519 for (uint32_t n = 1; n <= 8; n++) { 20520 for (uint32_t m = 1; m <= 6; m++) { 20521 GemmMicrokernelTester() 20522 .mr(6) 20523 .nr(8) 20524 .kr(1) 20525 .sr(1) 20526 .m(m) 20527 .n(n) 20528 .k(k) 20529 .cm_stride(11) 20530 .iterations(1) 20531 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20532 } 20533 } 20534 } 20535 } 20536 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,a_offset)20537 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, a_offset) { 20538 TEST_REQUIRES_ARM_NEON; 20539 for (size_t k = 1; k <= 40; k += 9) { 20540 GemmMicrokernelTester() 20541 .mr(6) 20542 .nr(8) 20543 .kr(1) 20544 .sr(1) 20545 .m(6) 20546 .n(8) 20547 .k(k) 20548 .ks(3) 20549 .a_offset(251) 20550 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20551 } 20552 } 20553 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,zero)20554 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, zero) { 20555 TEST_REQUIRES_ARM_NEON; 20556 for (size_t k = 1; k <= 40; k += 9) { 20557 for (uint32_t mz = 0; mz < 6; mz++) { 20558 GemmMicrokernelTester() 20559 .mr(6) 20560 .nr(8) 20561 .kr(1) 20562 .sr(1) 20563 .m(6) 20564 .n(8) 20565 .k(k) 20566 .ks(3) 20567 .a_offset(251) 20568 .zero_index(mz) 20569 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20570 } 20571 } 20572 } 20573 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,qmin)20574 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, qmin) { 20575 TEST_REQUIRES_ARM_NEON; 20576 GemmMicrokernelTester() 20577 .mr(6) 20578 .nr(8) 20579 .kr(1) 20580 .sr(1) 20581 .m(6) 20582 .n(8) 20583 .k(8) 20584 .qmin(128) 20585 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20586 } 20587 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,qmax)20588 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, qmax) { 20589 TEST_REQUIRES_ARM_NEON; 20590 GemmMicrokernelTester() 20591 .mr(6) 20592 .nr(8) 20593 .kr(1) 20594 .sr(1) 20595 .m(6) 20596 .n(8) 20597 .k(8) 20598 .qmax(128) 20599 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20600 } 20601 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE,strided_cm)20602 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, strided_cm) { 20603 TEST_REQUIRES_ARM_NEON; 20604 GemmMicrokernelTester() 20605 .mr(6) 20606 .nr(8) 20607 .kr(1) 20608 .sr(1) 20609 .m(6) 20610 .n(8) 20611 .k(8) 20612 .cm_stride(11) 20613 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20614 } 20615 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 20616 20617 20618 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_eq_8)20619 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8) { 20620 TEST_REQUIRES_ARM_NEON; 20621 GemmMicrokernelTester() 20622 .mr(4) 20623 .nr(16) 20624 .kr(1) 20625 .sr(1) 20626 .m(4) 20627 .n(16) 20628 .k(8) 20629 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20630 } 20631 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,strided_cn)20632 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, strided_cn) { 20633 TEST_REQUIRES_ARM_NEON; 20634 GemmMicrokernelTester() 20635 .mr(4) 20636 .nr(16) 20637 .kr(1) 20638 .sr(1) 20639 .m(4) 20640 .n(16) 20641 .k(8) 20642 .cn_stride(19) 20643 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20644 } 20645 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_eq_8_subtile)20646 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_subtile) { 20647 TEST_REQUIRES_ARM_NEON; 20648 for (uint32_t n = 1; n <= 16; n++) { 20649 for (uint32_t m = 1; m <= 4; m++) { 20650 GemmMicrokernelTester() 20651 .mr(4) 20652 .nr(16) 20653 .kr(1) 20654 .sr(1) 20655 .m(m) 20656 .n(n) 20657 .k(8) 20658 .iterations(1) 20659 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20660 } 20661 } 20662 } 20663 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_eq_8_subtile_m)20664 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_subtile_m) { 20665 TEST_REQUIRES_ARM_NEON; 20666 for (uint32_t m = 1; m <= 4; m++) { 20667 GemmMicrokernelTester() 20668 .mr(4) 20669 .nr(16) 20670 .kr(1) 20671 .sr(1) 20672 .m(m) 20673 .n(16) 20674 .k(8) 20675 .iterations(1) 20676 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20677 } 20678 } 20679 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_eq_8_subtile_n)20680 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_subtile_n) { 20681 TEST_REQUIRES_ARM_NEON; 20682 for (uint32_t n = 1; n <= 16; n++) { 20683 GemmMicrokernelTester() 20684 .mr(4) 20685 .nr(16) 20686 .kr(1) 20687 .sr(1) 20688 .m(4) 20689 .n(n) 20690 .k(8) 20691 .iterations(1) 20692 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20693 } 20694 } 20695 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_lt_8)20696 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_lt_8) { 20697 TEST_REQUIRES_ARM_NEON; 20698 for (size_t k = 1; k < 8; k++) { 20699 GemmMicrokernelTester() 20700 .mr(4) 20701 .nr(16) 20702 .kr(1) 20703 .sr(1) 20704 .m(4) 20705 .n(16) 20706 .k(k) 20707 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20708 } 20709 } 20710 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_lt_8_subtile)20711 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_lt_8_subtile) { 20712 TEST_REQUIRES_ARM_NEON; 20713 for (size_t k = 1; k < 8; k++) { 20714 for (uint32_t n = 1; n <= 16; n++) { 20715 for (uint32_t m = 1; m <= 4; m++) { 20716 GemmMicrokernelTester() 20717 .mr(4) 20718 .nr(16) 20719 .kr(1) 20720 .sr(1) 20721 .m(m) 20722 .n(n) 20723 .k(k) 20724 .iterations(1) 20725 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20726 } 20727 } 20728 } 20729 } 20730 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_gt_8)20731 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_gt_8) { 20732 TEST_REQUIRES_ARM_NEON; 20733 for (size_t k = 9; k < 16; k++) { 20734 GemmMicrokernelTester() 20735 .mr(4) 20736 .nr(16) 20737 .kr(1) 20738 .sr(1) 20739 .m(4) 20740 .n(16) 20741 .k(k) 20742 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20743 } 20744 } 20745 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_gt_8_subtile)20746 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_gt_8_subtile) { 20747 TEST_REQUIRES_ARM_NEON; 20748 for (size_t k = 9; k < 16; k++) { 20749 for (uint32_t n = 1; n <= 16; n++) { 20750 for (uint32_t m = 1; m <= 4; m++) { 20751 GemmMicrokernelTester() 20752 .mr(4) 20753 .nr(16) 20754 .kr(1) 20755 .sr(1) 20756 .m(m) 20757 .n(n) 20758 .k(k) 20759 .iterations(1) 20760 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20761 } 20762 } 20763 } 20764 } 20765 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_div_8)20766 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_div_8) { 20767 TEST_REQUIRES_ARM_NEON; 20768 for (size_t k = 16; k <= 80; k += 8) { 20769 GemmMicrokernelTester() 20770 .mr(4) 20771 .nr(16) 20772 .kr(1) 20773 .sr(1) 20774 .m(4) 20775 .n(16) 20776 .k(k) 20777 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20778 } 20779 } 20780 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_div_8_subtile)20781 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_div_8_subtile) { 20782 TEST_REQUIRES_ARM_NEON; 20783 for (size_t k = 16; k <= 80; k += 8) { 20784 for (uint32_t n = 1; n <= 16; n++) { 20785 for (uint32_t m = 1; m <= 4; m++) { 20786 GemmMicrokernelTester() 20787 .mr(4) 20788 .nr(16) 20789 .kr(1) 20790 .sr(1) 20791 .m(m) 20792 .n(n) 20793 .k(k) 20794 .iterations(1) 20795 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20796 } 20797 } 20798 } 20799 } 20800 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_gt_16)20801 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16) { 20802 TEST_REQUIRES_ARM_NEON; 20803 for (uint32_t n = 17; n < 32; n++) { 20804 for (size_t k = 1; k <= 40; k += 9) { 20805 GemmMicrokernelTester() 20806 .mr(4) 20807 .nr(16) 20808 .kr(1) 20809 .sr(1) 20810 .m(4) 20811 .n(n) 20812 .k(k) 20813 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20814 } 20815 } 20816 } 20817 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_gt_16_strided_cn)20818 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16_strided_cn) { 20819 TEST_REQUIRES_ARM_NEON; 20820 for (uint32_t n = 17; n < 32; n++) { 20821 for (size_t k = 1; k <= 40; k += 9) { 20822 GemmMicrokernelTester() 20823 .mr(4) 20824 .nr(16) 20825 .kr(1) 20826 .sr(1) 20827 .m(4) 20828 .n(n) 20829 .k(k) 20830 .cn_stride(19) 20831 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20832 } 20833 } 20834 } 20835 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_gt_16_subtile)20836 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16_subtile) { 20837 TEST_REQUIRES_ARM_NEON; 20838 for (uint32_t n = 17; n < 32; n++) { 20839 for (size_t k = 1; k <= 40; k += 9) { 20840 for (uint32_t m = 1; m <= 4; m++) { 20841 GemmMicrokernelTester() 20842 .mr(4) 20843 .nr(16) 20844 .kr(1) 20845 .sr(1) 20846 .m(m) 20847 .n(n) 20848 .k(k) 20849 .iterations(1) 20850 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20851 } 20852 } 20853 } 20854 } 20855 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_div_16)20856 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16) { 20857 TEST_REQUIRES_ARM_NEON; 20858 for (uint32_t n = 32; n <= 48; n += 16) { 20859 for (size_t k = 1; k <= 40; k += 9) { 20860 GemmMicrokernelTester() 20861 .mr(4) 20862 .nr(16) 20863 .kr(1) 20864 .sr(1) 20865 .m(4) 20866 .n(n) 20867 .k(k) 20868 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20869 } 20870 } 20871 } 20872 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_div_16_strided_cn)20873 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16_strided_cn) { 20874 TEST_REQUIRES_ARM_NEON; 20875 for (uint32_t n = 32; n <= 48; n += 16) { 20876 for (size_t k = 1; k <= 40; k += 9) { 20877 GemmMicrokernelTester() 20878 .mr(4) 20879 .nr(16) 20880 .kr(1) 20881 .sr(1) 20882 .m(4) 20883 .n(n) 20884 .k(k) 20885 .cn_stride(19) 20886 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20887 } 20888 } 20889 } 20890 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_div_16_subtile)20891 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16_subtile) { 20892 TEST_REQUIRES_ARM_NEON; 20893 for (uint32_t n = 32; n <= 48; n += 16) { 20894 for (size_t k = 1; k <= 40; k += 9) { 20895 for (uint32_t m = 1; m <= 4; m++) { 20896 GemmMicrokernelTester() 20897 .mr(4) 20898 .nr(16) 20899 .kr(1) 20900 .sr(1) 20901 .m(m) 20902 .n(n) 20903 .k(k) 20904 .iterations(1) 20905 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20906 } 20907 } 20908 } 20909 } 20910 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,small_kernel)20911 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, small_kernel) { 20912 TEST_REQUIRES_ARM_NEON; 20913 for (size_t k = 1; k <= 40; k += 9) { 20914 GemmMicrokernelTester() 20915 .mr(4) 20916 .nr(16) 20917 .kr(1) 20918 .sr(1) 20919 .m(4) 20920 .n(16) 20921 .k(k) 20922 .ks(3) 20923 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20924 } 20925 } 20926 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,small_kernel_subtile)20927 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, small_kernel_subtile) { 20928 TEST_REQUIRES_ARM_NEON; 20929 for (size_t k = 1; k <= 40; k += 9) { 20930 for (uint32_t n = 1; n <= 16; n++) { 20931 for (uint32_t m = 1; m <= 4; m++) { 20932 GemmMicrokernelTester() 20933 .mr(4) 20934 .nr(16) 20935 .kr(1) 20936 .sr(1) 20937 .m(m) 20938 .n(n) 20939 .k(k) 20940 .ks(3) 20941 .iterations(1) 20942 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20943 } 20944 } 20945 } 20946 } 20947 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_gt_16_small_kernel)20948 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16_small_kernel) { 20949 TEST_REQUIRES_ARM_NEON; 20950 for (uint32_t n = 17; n < 32; n++) { 20951 for (size_t k = 1; k <= 40; k += 9) { 20952 GemmMicrokernelTester() 20953 .mr(4) 20954 .nr(16) 20955 .kr(1) 20956 .sr(1) 20957 .m(4) 20958 .n(n) 20959 .k(k) 20960 .ks(3) 20961 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20962 } 20963 } 20964 } 20965 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_div_16_small_kernel)20966 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16_small_kernel) { 20967 TEST_REQUIRES_ARM_NEON; 20968 for (uint32_t n = 32; n <= 48; n += 16) { 20969 for (size_t k = 1; k <= 40; k += 9) { 20970 GemmMicrokernelTester() 20971 .mr(4) 20972 .nr(16) 20973 .kr(1) 20974 .sr(1) 20975 .m(4) 20976 .n(n) 20977 .k(k) 20978 .ks(3) 20979 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20980 } 20981 } 20982 } 20983 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,strided_cm_subtile)20984 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, strided_cm_subtile) { 20985 TEST_REQUIRES_ARM_NEON; 20986 for (size_t k = 1; k <= 40; k += 9) { 20987 for (uint32_t n = 1; n <= 16; n++) { 20988 for (uint32_t m = 1; m <= 4; m++) { 20989 GemmMicrokernelTester() 20990 .mr(4) 20991 .nr(16) 20992 .kr(1) 20993 .sr(1) 20994 .m(m) 20995 .n(n) 20996 .k(k) 20997 .cm_stride(19) 20998 .iterations(1) 20999 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21000 } 21001 } 21002 } 21003 } 21004 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,a_offset)21005 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, a_offset) { 21006 TEST_REQUIRES_ARM_NEON; 21007 for (size_t k = 1; k <= 40; k += 9) { 21008 GemmMicrokernelTester() 21009 .mr(4) 21010 .nr(16) 21011 .kr(1) 21012 .sr(1) 21013 .m(4) 21014 .n(16) 21015 .k(k) 21016 .ks(3) 21017 .a_offset(163) 21018 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21019 } 21020 } 21021 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,zero)21022 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, zero) { 21023 TEST_REQUIRES_ARM_NEON; 21024 for (size_t k = 1; k <= 40; k += 9) { 21025 for (uint32_t mz = 0; mz < 4; mz++) { 21026 GemmMicrokernelTester() 21027 .mr(4) 21028 .nr(16) 21029 .kr(1) 21030 .sr(1) 21031 .m(4) 21032 .n(16) 21033 .k(k) 21034 .ks(3) 21035 .a_offset(163) 21036 .zero_index(mz) 21037 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21038 } 21039 } 21040 } 21041 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,qmin)21042 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, qmin) { 21043 TEST_REQUIRES_ARM_NEON; 21044 GemmMicrokernelTester() 21045 .mr(4) 21046 .nr(16) 21047 .kr(1) 21048 .sr(1) 21049 .m(4) 21050 .n(16) 21051 .k(8) 21052 .qmin(128) 21053 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21054 } 21055 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,qmax)21056 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, qmax) { 21057 TEST_REQUIRES_ARM_NEON; 21058 GemmMicrokernelTester() 21059 .mr(4) 21060 .nr(16) 21061 .kr(1) 21062 .sr(1) 21063 .m(4) 21064 .n(16) 21065 .k(8) 21066 .qmax(128) 21067 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21068 } 21069 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,strided_cm)21070 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, strided_cm) { 21071 TEST_REQUIRES_ARM_NEON; 21072 GemmMicrokernelTester() 21073 .mr(4) 21074 .nr(16) 21075 .kr(1) 21076 .sr(1) 21077 .m(4) 21078 .n(16) 21079 .k(8) 21080 .cm_stride(19) 21081 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21082 } 21083 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 21084 21085 21086 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_eq_8)21087 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8) { 21088 TEST_REQUIRES_ARM_NEON; 21089 GemmMicrokernelTester() 21090 .mr(6) 21091 .nr(16) 21092 .kr(1) 21093 .sr(1) 21094 .m(6) 21095 .n(16) 21096 .k(8) 21097 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21098 } 21099 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,strided_cn)21100 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, strided_cn) { 21101 TEST_REQUIRES_ARM_NEON; 21102 GemmMicrokernelTester() 21103 .mr(6) 21104 .nr(16) 21105 .kr(1) 21106 .sr(1) 21107 .m(6) 21108 .n(16) 21109 .k(8) 21110 .cn_stride(19) 21111 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21112 } 21113 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_eq_8_subtile)21114 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8_subtile) { 21115 TEST_REQUIRES_ARM_NEON; 21116 for (uint32_t n = 1; n <= 16; n++) { 21117 for (uint32_t m = 1; m <= 6; m++) { 21118 GemmMicrokernelTester() 21119 .mr(6) 21120 .nr(16) 21121 .kr(1) 21122 .sr(1) 21123 .m(m) 21124 .n(n) 21125 .k(8) 21126 .iterations(1) 21127 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21128 } 21129 } 21130 } 21131 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_eq_8_subtile_m)21132 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8_subtile_m) { 21133 TEST_REQUIRES_ARM_NEON; 21134 for (uint32_t m = 1; m <= 6; m++) { 21135 GemmMicrokernelTester() 21136 .mr(6) 21137 .nr(16) 21138 .kr(1) 21139 .sr(1) 21140 .m(m) 21141 .n(16) 21142 .k(8) 21143 .iterations(1) 21144 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21145 } 21146 } 21147 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_eq_8_subtile_n)21148 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8_subtile_n) { 21149 TEST_REQUIRES_ARM_NEON; 21150 for (uint32_t n = 1; n <= 16; n++) { 21151 GemmMicrokernelTester() 21152 .mr(6) 21153 .nr(16) 21154 .kr(1) 21155 .sr(1) 21156 .m(6) 21157 .n(n) 21158 .k(8) 21159 .iterations(1) 21160 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21161 } 21162 } 21163 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_lt_8)21164 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_lt_8) { 21165 TEST_REQUIRES_ARM_NEON; 21166 for (size_t k = 1; k < 8; k++) { 21167 GemmMicrokernelTester() 21168 .mr(6) 21169 .nr(16) 21170 .kr(1) 21171 .sr(1) 21172 .m(6) 21173 .n(16) 21174 .k(k) 21175 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21176 } 21177 } 21178 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_lt_8_subtile)21179 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_lt_8_subtile) { 21180 TEST_REQUIRES_ARM_NEON; 21181 for (size_t k = 1; k < 8; k++) { 21182 for (uint32_t n = 1; n <= 16; n++) { 21183 for (uint32_t m = 1; m <= 6; m++) { 21184 GemmMicrokernelTester() 21185 .mr(6) 21186 .nr(16) 21187 .kr(1) 21188 .sr(1) 21189 .m(m) 21190 .n(n) 21191 .k(k) 21192 .iterations(1) 21193 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21194 } 21195 } 21196 } 21197 } 21198 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_gt_8)21199 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_gt_8) { 21200 TEST_REQUIRES_ARM_NEON; 21201 for (size_t k = 9; k < 16; k++) { 21202 GemmMicrokernelTester() 21203 .mr(6) 21204 .nr(16) 21205 .kr(1) 21206 .sr(1) 21207 .m(6) 21208 .n(16) 21209 .k(k) 21210 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21211 } 21212 } 21213 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_gt_8_subtile)21214 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_gt_8_subtile) { 21215 TEST_REQUIRES_ARM_NEON; 21216 for (size_t k = 9; k < 16; k++) { 21217 for (uint32_t n = 1; n <= 16; n++) { 21218 for (uint32_t m = 1; m <= 6; m++) { 21219 GemmMicrokernelTester() 21220 .mr(6) 21221 .nr(16) 21222 .kr(1) 21223 .sr(1) 21224 .m(m) 21225 .n(n) 21226 .k(k) 21227 .iterations(1) 21228 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21229 } 21230 } 21231 } 21232 } 21233 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_div_8)21234 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_div_8) { 21235 TEST_REQUIRES_ARM_NEON; 21236 for (size_t k = 16; k <= 80; k += 8) { 21237 GemmMicrokernelTester() 21238 .mr(6) 21239 .nr(16) 21240 .kr(1) 21241 .sr(1) 21242 .m(6) 21243 .n(16) 21244 .k(k) 21245 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21246 } 21247 } 21248 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,k_div_8_subtile)21249 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_div_8_subtile) { 21250 TEST_REQUIRES_ARM_NEON; 21251 for (size_t k = 16; k <= 80; k += 8) { 21252 for (uint32_t n = 1; n <= 16; n++) { 21253 for (uint32_t m = 1; m <= 6; m++) { 21254 GemmMicrokernelTester() 21255 .mr(6) 21256 .nr(16) 21257 .kr(1) 21258 .sr(1) 21259 .m(m) 21260 .n(n) 21261 .k(k) 21262 .iterations(1) 21263 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21264 } 21265 } 21266 } 21267 } 21268 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,n_gt_16)21269 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16) { 21270 TEST_REQUIRES_ARM_NEON; 21271 for (uint32_t n = 17; n < 32; n++) { 21272 for (size_t k = 1; k <= 40; k += 9) { 21273 GemmMicrokernelTester() 21274 .mr(6) 21275 .nr(16) 21276 .kr(1) 21277 .sr(1) 21278 .m(6) 21279 .n(n) 21280 .k(k) 21281 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21282 } 21283 } 21284 } 21285 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,n_gt_16_strided_cn)21286 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16_strided_cn) { 21287 TEST_REQUIRES_ARM_NEON; 21288 for (uint32_t n = 17; n < 32; n++) { 21289 for (size_t k = 1; k <= 40; k += 9) { 21290 GemmMicrokernelTester() 21291 .mr(6) 21292 .nr(16) 21293 .kr(1) 21294 .sr(1) 21295 .m(6) 21296 .n(n) 21297 .k(k) 21298 .cn_stride(19) 21299 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21300 } 21301 } 21302 } 21303 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,n_gt_16_subtile)21304 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16_subtile) { 21305 TEST_REQUIRES_ARM_NEON; 21306 for (uint32_t n = 17; n < 32; n++) { 21307 for (size_t k = 1; k <= 40; k += 9) { 21308 for (uint32_t m = 1; m <= 6; m++) { 21309 GemmMicrokernelTester() 21310 .mr(6) 21311 .nr(16) 21312 .kr(1) 21313 .sr(1) 21314 .m(m) 21315 .n(n) 21316 .k(k) 21317 .iterations(1) 21318 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21319 } 21320 } 21321 } 21322 } 21323 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,n_div_16)21324 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16) { 21325 TEST_REQUIRES_ARM_NEON; 21326 for (uint32_t n = 32; n <= 48; n += 16) { 21327 for (size_t k = 1; k <= 40; k += 9) { 21328 GemmMicrokernelTester() 21329 .mr(6) 21330 .nr(16) 21331 .kr(1) 21332 .sr(1) 21333 .m(6) 21334 .n(n) 21335 .k(k) 21336 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21337 } 21338 } 21339 } 21340 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,n_div_16_strided_cn)21341 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16_strided_cn) { 21342 TEST_REQUIRES_ARM_NEON; 21343 for (uint32_t n = 32; n <= 48; n += 16) { 21344 for (size_t k = 1; k <= 40; k += 9) { 21345 GemmMicrokernelTester() 21346 .mr(6) 21347 .nr(16) 21348 .kr(1) 21349 .sr(1) 21350 .m(6) 21351 .n(n) 21352 .k(k) 21353 .cn_stride(19) 21354 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21355 } 21356 } 21357 } 21358 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,n_div_16_subtile)21359 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16_subtile) { 21360 TEST_REQUIRES_ARM_NEON; 21361 for (uint32_t n = 32; n <= 48; n += 16) { 21362 for (size_t k = 1; k <= 40; k += 9) { 21363 for (uint32_t m = 1; m <= 6; m++) { 21364 GemmMicrokernelTester() 21365 .mr(6) 21366 .nr(16) 21367 .kr(1) 21368 .sr(1) 21369 .m(m) 21370 .n(n) 21371 .k(k) 21372 .iterations(1) 21373 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21374 } 21375 } 21376 } 21377 } 21378 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,small_kernel)21379 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, small_kernel) { 21380 TEST_REQUIRES_ARM_NEON; 21381 for (size_t k = 1; k <= 40; k += 9) { 21382 GemmMicrokernelTester() 21383 .mr(6) 21384 .nr(16) 21385 .kr(1) 21386 .sr(1) 21387 .m(6) 21388 .n(16) 21389 .k(k) 21390 .ks(3) 21391 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21392 } 21393 } 21394 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,small_kernel_subtile)21395 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, small_kernel_subtile) { 21396 TEST_REQUIRES_ARM_NEON; 21397 for (size_t k = 1; k <= 40; k += 9) { 21398 for (uint32_t n = 1; n <= 16; n++) { 21399 for (uint32_t m = 1; m <= 6; m++) { 21400 GemmMicrokernelTester() 21401 .mr(6) 21402 .nr(16) 21403 .kr(1) 21404 .sr(1) 21405 .m(m) 21406 .n(n) 21407 .k(k) 21408 .ks(3) 21409 .iterations(1) 21410 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21411 } 21412 } 21413 } 21414 } 21415 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,n_gt_16_small_kernel)21416 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16_small_kernel) { 21417 TEST_REQUIRES_ARM_NEON; 21418 for (uint32_t n = 17; n < 32; n++) { 21419 for (size_t k = 1; k <= 40; k += 9) { 21420 GemmMicrokernelTester() 21421 .mr(6) 21422 .nr(16) 21423 .kr(1) 21424 .sr(1) 21425 .m(6) 21426 .n(n) 21427 .k(k) 21428 .ks(3) 21429 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21430 } 21431 } 21432 } 21433 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,n_div_16_small_kernel)21434 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16_small_kernel) { 21435 TEST_REQUIRES_ARM_NEON; 21436 for (uint32_t n = 32; n <= 48; n += 16) { 21437 for (size_t k = 1; k <= 40; k += 9) { 21438 GemmMicrokernelTester() 21439 .mr(6) 21440 .nr(16) 21441 .kr(1) 21442 .sr(1) 21443 .m(6) 21444 .n(n) 21445 .k(k) 21446 .ks(3) 21447 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21448 } 21449 } 21450 } 21451 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,strided_cm_subtile)21452 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, strided_cm_subtile) { 21453 TEST_REQUIRES_ARM_NEON; 21454 for (size_t k = 1; k <= 40; k += 9) { 21455 for (uint32_t n = 1; n <= 16; n++) { 21456 for (uint32_t m = 1; m <= 6; m++) { 21457 GemmMicrokernelTester() 21458 .mr(6) 21459 .nr(16) 21460 .kr(1) 21461 .sr(1) 21462 .m(m) 21463 .n(n) 21464 .k(k) 21465 .cm_stride(19) 21466 .iterations(1) 21467 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21468 } 21469 } 21470 } 21471 } 21472 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,a_offset)21473 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, a_offset) { 21474 TEST_REQUIRES_ARM_NEON; 21475 for (size_t k = 1; k <= 40; k += 9) { 21476 GemmMicrokernelTester() 21477 .mr(6) 21478 .nr(16) 21479 .kr(1) 21480 .sr(1) 21481 .m(6) 21482 .n(16) 21483 .k(k) 21484 .ks(3) 21485 .a_offset(251) 21486 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21487 } 21488 } 21489 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,zero)21490 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, zero) { 21491 TEST_REQUIRES_ARM_NEON; 21492 for (size_t k = 1; k <= 40; k += 9) { 21493 for (uint32_t mz = 0; mz < 6; mz++) { 21494 GemmMicrokernelTester() 21495 .mr(6) 21496 .nr(16) 21497 .kr(1) 21498 .sr(1) 21499 .m(6) 21500 .n(16) 21501 .k(k) 21502 .ks(3) 21503 .a_offset(251) 21504 .zero_index(mz) 21505 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21506 } 21507 } 21508 } 21509 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,qmin)21510 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, qmin) { 21511 TEST_REQUIRES_ARM_NEON; 21512 GemmMicrokernelTester() 21513 .mr(6) 21514 .nr(16) 21515 .kr(1) 21516 .sr(1) 21517 .m(6) 21518 .n(16) 21519 .k(8) 21520 .qmin(128) 21521 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21522 } 21523 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,qmax)21524 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, qmax) { 21525 TEST_REQUIRES_ARM_NEON; 21526 GemmMicrokernelTester() 21527 .mr(6) 21528 .nr(16) 21529 .kr(1) 21530 .sr(1) 21531 .m(6) 21532 .n(16) 21533 .k(8) 21534 .qmax(128) 21535 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21536 } 21537 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE,strided_cm)21538 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, strided_cm) { 21539 TEST_REQUIRES_ARM_NEON; 21540 GemmMicrokernelTester() 21541 .mr(6) 21542 .nr(16) 21543 .kr(1) 21544 .sr(1) 21545 .m(6) 21546 .n(16) 21547 .k(8) 21548 .cm_stride(19) 21549 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21550 } 21551 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 21552 21553 21554 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_eq_8)21555 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_eq_8) { 21556 TEST_REQUIRES_ARM_NEON; 21557 GemmMicrokernelTester() 21558 .mr(2) 21559 .nr(16) 21560 .kr(1) 21561 .sr(1) 21562 .m(2) 21563 .n(16) 21564 .k(8) 21565 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21566 } 21567 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,strided_cn)21568 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, strided_cn) { 21569 TEST_REQUIRES_ARM_NEON; 21570 GemmMicrokernelTester() 21571 .mr(2) 21572 .nr(16) 21573 .kr(1) 21574 .sr(1) 21575 .m(2) 21576 .n(16) 21577 .k(8) 21578 .cn_stride(19) 21579 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21580 } 21581 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)21582 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) { 21583 TEST_REQUIRES_ARM_NEON; 21584 for (uint32_t n = 1; n <= 16; n++) { 21585 for (uint32_t m = 1; m <= 2; m++) { 21586 GemmMicrokernelTester() 21587 .mr(2) 21588 .nr(16) 21589 .kr(1) 21590 .sr(1) 21591 .m(m) 21592 .n(n) 21593 .k(8) 21594 .iterations(1) 21595 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21596 } 21597 } 21598 } 21599 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)21600 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) { 21601 TEST_REQUIRES_ARM_NEON; 21602 for (uint32_t m = 1; m <= 2; m++) { 21603 GemmMicrokernelTester() 21604 .mr(2) 21605 .nr(16) 21606 .kr(1) 21607 .sr(1) 21608 .m(m) 21609 .n(16) 21610 .k(8) 21611 .iterations(1) 21612 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21613 } 21614 } 21615 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)21616 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) { 21617 TEST_REQUIRES_ARM_NEON; 21618 for (uint32_t n = 1; n <= 16; n++) { 21619 GemmMicrokernelTester() 21620 .mr(2) 21621 .nr(16) 21622 .kr(1) 21623 .sr(1) 21624 .m(2) 21625 .n(n) 21626 .k(8) 21627 .iterations(1) 21628 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21629 } 21630 } 21631 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_lt_8)21632 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_lt_8) { 21633 TEST_REQUIRES_ARM_NEON; 21634 for (size_t k = 1; k < 8; k++) { 21635 GemmMicrokernelTester() 21636 .mr(2) 21637 .nr(16) 21638 .kr(1) 21639 .sr(1) 21640 .m(2) 21641 .n(16) 21642 .k(k) 21643 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21644 } 21645 } 21646 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)21647 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) { 21648 TEST_REQUIRES_ARM_NEON; 21649 for (size_t k = 1; k < 8; k++) { 21650 for (uint32_t n = 1; n <= 16; n++) { 21651 for (uint32_t m = 1; m <= 2; m++) { 21652 GemmMicrokernelTester() 21653 .mr(2) 21654 .nr(16) 21655 .kr(1) 21656 .sr(1) 21657 .m(m) 21658 .n(n) 21659 .k(k) 21660 .iterations(1) 21661 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21662 } 21663 } 21664 } 21665 } 21666 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_gt_8)21667 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_gt_8) { 21668 TEST_REQUIRES_ARM_NEON; 21669 for (size_t k = 9; k < 16; k++) { 21670 GemmMicrokernelTester() 21671 .mr(2) 21672 .nr(16) 21673 .kr(1) 21674 .sr(1) 21675 .m(2) 21676 .n(16) 21677 .k(k) 21678 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21679 } 21680 } 21681 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)21682 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) { 21683 TEST_REQUIRES_ARM_NEON; 21684 for (size_t k = 9; k < 16; k++) { 21685 for (uint32_t n = 1; n <= 16; n++) { 21686 for (uint32_t m = 1; m <= 2; m++) { 21687 GemmMicrokernelTester() 21688 .mr(2) 21689 .nr(16) 21690 .kr(1) 21691 .sr(1) 21692 .m(m) 21693 .n(n) 21694 .k(k) 21695 .iterations(1) 21696 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21697 } 21698 } 21699 } 21700 } 21701 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_div_8)21702 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_div_8) { 21703 TEST_REQUIRES_ARM_NEON; 21704 for (size_t k = 16; k <= 80; k += 8) { 21705 GemmMicrokernelTester() 21706 .mr(2) 21707 .nr(16) 21708 .kr(1) 21709 .sr(1) 21710 .m(2) 21711 .n(16) 21712 .k(k) 21713 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21714 } 21715 } 21716 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_div_8_subtile)21717 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) { 21718 TEST_REQUIRES_ARM_NEON; 21719 for (size_t k = 16; k <= 80; k += 8) { 21720 for (uint32_t n = 1; n <= 16; n++) { 21721 for (uint32_t m = 1; m <= 2; m++) { 21722 GemmMicrokernelTester() 21723 .mr(2) 21724 .nr(16) 21725 .kr(1) 21726 .sr(1) 21727 .m(m) 21728 .n(n) 21729 .k(k) 21730 .iterations(1) 21731 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21732 } 21733 } 21734 } 21735 } 21736 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,n_gt_16)21737 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_gt_16) { 21738 TEST_REQUIRES_ARM_NEON; 21739 for (uint32_t n = 17; n < 32; n++) { 21740 for (size_t k = 1; k <= 40; k += 9) { 21741 GemmMicrokernelTester() 21742 .mr(2) 21743 .nr(16) 21744 .kr(1) 21745 .sr(1) 21746 .m(2) 21747 .n(n) 21748 .k(k) 21749 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21750 } 21751 } 21752 } 21753 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_cn)21754 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) { 21755 TEST_REQUIRES_ARM_NEON; 21756 for (uint32_t n = 17; n < 32; n++) { 21757 for (size_t k = 1; k <= 40; k += 9) { 21758 GemmMicrokernelTester() 21759 .mr(2) 21760 .nr(16) 21761 .kr(1) 21762 .sr(1) 21763 .m(2) 21764 .n(n) 21765 .k(k) 21766 .cn_stride(19) 21767 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21768 } 21769 } 21770 } 21771 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,n_gt_16_subtile)21772 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) { 21773 TEST_REQUIRES_ARM_NEON; 21774 for (uint32_t n = 17; n < 32; n++) { 21775 for (size_t k = 1; k <= 40; k += 9) { 21776 for (uint32_t m = 1; m <= 2; m++) { 21777 GemmMicrokernelTester() 21778 .mr(2) 21779 .nr(16) 21780 .kr(1) 21781 .sr(1) 21782 .m(m) 21783 .n(n) 21784 .k(k) 21785 .iterations(1) 21786 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21787 } 21788 } 21789 } 21790 } 21791 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,n_div_16)21792 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_div_16) { 21793 TEST_REQUIRES_ARM_NEON; 21794 for (uint32_t n = 32; n <= 48; n += 16) { 21795 for (size_t k = 1; k <= 40; k += 9) { 21796 GemmMicrokernelTester() 21797 .mr(2) 21798 .nr(16) 21799 .kr(1) 21800 .sr(1) 21801 .m(2) 21802 .n(n) 21803 .k(k) 21804 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21805 } 21806 } 21807 } 21808 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_cn)21809 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) { 21810 TEST_REQUIRES_ARM_NEON; 21811 for (uint32_t n = 32; n <= 48; n += 16) { 21812 for (size_t k = 1; k <= 40; k += 9) { 21813 GemmMicrokernelTester() 21814 .mr(2) 21815 .nr(16) 21816 .kr(1) 21817 .sr(1) 21818 .m(2) 21819 .n(n) 21820 .k(k) 21821 .cn_stride(19) 21822 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21823 } 21824 } 21825 } 21826 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,n_div_16_subtile)21827 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) { 21828 TEST_REQUIRES_ARM_NEON; 21829 for (uint32_t n = 32; n <= 48; n += 16) { 21830 for (size_t k = 1; k <= 40; k += 9) { 21831 for (uint32_t m = 1; m <= 2; m++) { 21832 GemmMicrokernelTester() 21833 .mr(2) 21834 .nr(16) 21835 .kr(1) 21836 .sr(1) 21837 .m(m) 21838 .n(n) 21839 .k(k) 21840 .iterations(1) 21841 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21842 } 21843 } 21844 } 21845 } 21846 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,small_kernel)21847 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, small_kernel) { 21848 TEST_REQUIRES_ARM_NEON; 21849 for (size_t k = 1; k <= 40; k += 9) { 21850 GemmMicrokernelTester() 21851 .mr(2) 21852 .nr(16) 21853 .kr(1) 21854 .sr(1) 21855 .m(2) 21856 .n(16) 21857 .k(k) 21858 .ks(3) 21859 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21860 } 21861 } 21862 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,small_kernel_subtile)21863 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, small_kernel_subtile) { 21864 TEST_REQUIRES_ARM_NEON; 21865 for (size_t k = 1; k <= 40; k += 9) { 21866 for (uint32_t n = 1; n <= 16; n++) { 21867 for (uint32_t m = 1; m <= 2; m++) { 21868 GemmMicrokernelTester() 21869 .mr(2) 21870 .nr(16) 21871 .kr(1) 21872 .sr(1) 21873 .m(m) 21874 .n(n) 21875 .k(k) 21876 .ks(3) 21877 .iterations(1) 21878 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21879 } 21880 } 21881 } 21882 } 21883 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,n_gt_16_small_kernel)21884 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_small_kernel) { 21885 TEST_REQUIRES_ARM_NEON; 21886 for (uint32_t n = 17; n < 32; n++) { 21887 for (size_t k = 1; k <= 40; k += 9) { 21888 GemmMicrokernelTester() 21889 .mr(2) 21890 .nr(16) 21891 .kr(1) 21892 .sr(1) 21893 .m(2) 21894 .n(n) 21895 .k(k) 21896 .ks(3) 21897 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21898 } 21899 } 21900 } 21901 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,n_div_16_small_kernel)21902 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_div_16_small_kernel) { 21903 TEST_REQUIRES_ARM_NEON; 21904 for (uint32_t n = 32; n <= 48; n += 16) { 21905 for (size_t k = 1; k <= 40; k += 9) { 21906 GemmMicrokernelTester() 21907 .mr(2) 21908 .nr(16) 21909 .kr(1) 21910 .sr(1) 21911 .m(2) 21912 .n(n) 21913 .k(k) 21914 .ks(3) 21915 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21916 } 21917 } 21918 } 21919 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,strided_cm_subtile)21920 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) { 21921 TEST_REQUIRES_ARM_NEON; 21922 for (size_t k = 1; k <= 40; k += 9) { 21923 for (uint32_t n = 1; n <= 16; n++) { 21924 for (uint32_t m = 1; m <= 2; m++) { 21925 GemmMicrokernelTester() 21926 .mr(2) 21927 .nr(16) 21928 .kr(1) 21929 .sr(1) 21930 .m(m) 21931 .n(n) 21932 .k(k) 21933 .cm_stride(19) 21934 .iterations(1) 21935 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21936 } 21937 } 21938 } 21939 } 21940 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,a_offset)21941 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, a_offset) { 21942 TEST_REQUIRES_ARM_NEON; 21943 for (size_t k = 1; k <= 40; k += 9) { 21944 GemmMicrokernelTester() 21945 .mr(2) 21946 .nr(16) 21947 .kr(1) 21948 .sr(1) 21949 .m(2) 21950 .n(16) 21951 .k(k) 21952 .ks(3) 21953 .a_offset(83) 21954 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21955 } 21956 } 21957 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,zero)21958 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, zero) { 21959 TEST_REQUIRES_ARM_NEON; 21960 for (size_t k = 1; k <= 40; k += 9) { 21961 for (uint32_t mz = 0; mz < 2; mz++) { 21962 GemmMicrokernelTester() 21963 .mr(2) 21964 .nr(16) 21965 .kr(1) 21966 .sr(1) 21967 .m(2) 21968 .n(16) 21969 .k(k) 21970 .ks(3) 21971 .a_offset(83) 21972 .zero_index(mz) 21973 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21974 } 21975 } 21976 } 21977 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,qmin)21978 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, qmin) { 21979 TEST_REQUIRES_ARM_NEON; 21980 GemmMicrokernelTester() 21981 .mr(2) 21982 .nr(16) 21983 .kr(1) 21984 .sr(1) 21985 .m(2) 21986 .n(16) 21987 .k(8) 21988 .qmin(128) 21989 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21990 } 21991 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,qmax)21992 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, qmax) { 21993 TEST_REQUIRES_ARM_NEON; 21994 GemmMicrokernelTester() 21995 .mr(2) 21996 .nr(16) 21997 .kr(1) 21998 .sr(1) 21999 .m(2) 22000 .n(16) 22001 .k(8) 22002 .qmax(128) 22003 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22004 } 22005 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,strided_cm)22006 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, strided_cm) { 22007 TEST_REQUIRES_ARM_NEON; 22008 GemmMicrokernelTester() 22009 .mr(2) 22010 .nr(16) 22011 .kr(1) 22012 .sr(1) 22013 .m(2) 22014 .n(16) 22015 .k(8) 22016 .cm_stride(19) 22017 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22018 } 22019 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 22020 22021 22022 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8)22023 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8) { 22024 TEST_REQUIRES_ARM_NEON_DOT; 22025 GemmMicrokernelTester() 22026 .mr(4) 22027 .nr(8) 22028 .kr(4) 22029 .sr(1) 22030 .m(4) 22031 .n(8) 22032 .k(8) 22033 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22034 } 22035 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,strided_cn)22036 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, strided_cn) { 22037 TEST_REQUIRES_ARM_NEON_DOT; 22038 GemmMicrokernelTester() 22039 .mr(4) 22040 .nr(8) 22041 .kr(4) 22042 .sr(1) 22043 .m(4) 22044 .n(8) 22045 .k(8) 22046 .cn_stride(11) 22047 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22048 } 22049 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8_subtile)22050 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile) { 22051 TEST_REQUIRES_ARM_NEON_DOT; 22052 for (uint32_t n = 1; n <= 8; n++) { 22053 for (uint32_t m = 1; m <= 4; m++) { 22054 GemmMicrokernelTester() 22055 .mr(4) 22056 .nr(8) 22057 .kr(4) 22058 .sr(1) 22059 .m(m) 22060 .n(n) 22061 .k(8) 22062 .iterations(1) 22063 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22064 } 22065 } 22066 } 22067 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8_subtile_m)22068 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile_m) { 22069 TEST_REQUIRES_ARM_NEON_DOT; 22070 for (uint32_t m = 1; m <= 4; m++) { 22071 GemmMicrokernelTester() 22072 .mr(4) 22073 .nr(8) 22074 .kr(4) 22075 .sr(1) 22076 .m(m) 22077 .n(8) 22078 .k(8) 22079 .iterations(1) 22080 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22081 } 22082 } 22083 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8_subtile_n)22084 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile_n) { 22085 TEST_REQUIRES_ARM_NEON_DOT; 22086 for (uint32_t n = 1; n <= 8; n++) { 22087 GemmMicrokernelTester() 22088 .mr(4) 22089 .nr(8) 22090 .kr(4) 22091 .sr(1) 22092 .m(4) 22093 .n(n) 22094 .k(8) 22095 .iterations(1) 22096 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22097 } 22098 } 22099 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,k_lt_8)22100 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8) { 22101 TEST_REQUIRES_ARM_NEON_DOT; 22102 for (size_t k = 1; k < 8; k++) { 22103 GemmMicrokernelTester() 22104 .mr(4) 22105 .nr(8) 22106 .kr(4) 22107 .sr(1) 22108 .m(4) 22109 .n(8) 22110 .k(k) 22111 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22112 } 22113 } 22114 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,k_lt_8_subtile)22115 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8_subtile) { 22116 TEST_REQUIRES_ARM_NEON_DOT; 22117 for (size_t k = 1; k < 8; k++) { 22118 for (uint32_t n = 1; n <= 8; n++) { 22119 for (uint32_t m = 1; m <= 4; m++) { 22120 GemmMicrokernelTester() 22121 .mr(4) 22122 .nr(8) 22123 .kr(4) 22124 .sr(1) 22125 .m(m) 22126 .n(n) 22127 .k(k) 22128 .iterations(1) 22129 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22130 } 22131 } 22132 } 22133 } 22134 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,k_gt_8)22135 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8) { 22136 TEST_REQUIRES_ARM_NEON_DOT; 22137 for (size_t k = 9; k < 16; k++) { 22138 GemmMicrokernelTester() 22139 .mr(4) 22140 .nr(8) 22141 .kr(4) 22142 .sr(1) 22143 .m(4) 22144 .n(8) 22145 .k(k) 22146 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22147 } 22148 } 22149 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,k_gt_8_subtile)22150 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8_subtile) { 22151 TEST_REQUIRES_ARM_NEON_DOT; 22152 for (size_t k = 9; k < 16; k++) { 22153 for (uint32_t n = 1; n <= 8; n++) { 22154 for (uint32_t m = 1; m <= 4; m++) { 22155 GemmMicrokernelTester() 22156 .mr(4) 22157 .nr(8) 22158 .kr(4) 22159 .sr(1) 22160 .m(m) 22161 .n(n) 22162 .k(k) 22163 .iterations(1) 22164 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22165 } 22166 } 22167 } 22168 } 22169 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,k_div_8)22170 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_div_8) { 22171 TEST_REQUIRES_ARM_NEON_DOT; 22172 for (size_t k = 16; k <= 80; k += 8) { 22173 GemmMicrokernelTester() 22174 .mr(4) 22175 .nr(8) 22176 .kr(4) 22177 .sr(1) 22178 .m(4) 22179 .n(8) 22180 .k(k) 22181 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22182 } 22183 } 22184 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,k_div_8_subtile)22185 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, k_div_8_subtile) { 22186 TEST_REQUIRES_ARM_NEON_DOT; 22187 for (size_t k = 16; k <= 80; k += 8) { 22188 for (uint32_t n = 1; n <= 8; n++) { 22189 for (uint32_t m = 1; m <= 4; m++) { 22190 GemmMicrokernelTester() 22191 .mr(4) 22192 .nr(8) 22193 .kr(4) 22194 .sr(1) 22195 .m(m) 22196 .n(n) 22197 .k(k) 22198 .iterations(1) 22199 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22200 } 22201 } 22202 } 22203 } 22204 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8)22205 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8) { 22206 TEST_REQUIRES_ARM_NEON_DOT; 22207 for (uint32_t n = 9; n < 16; n++) { 22208 for (size_t k = 1; k <= 40; k += 9) { 22209 GemmMicrokernelTester() 22210 .mr(4) 22211 .nr(8) 22212 .kr(4) 22213 .sr(1) 22214 .m(4) 22215 .n(n) 22216 .k(k) 22217 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22218 } 22219 } 22220 } 22221 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8_strided_cn)22222 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_strided_cn) { 22223 TEST_REQUIRES_ARM_NEON_DOT; 22224 for (uint32_t n = 9; n < 16; n++) { 22225 for (size_t k = 1; k <= 40; k += 9) { 22226 GemmMicrokernelTester() 22227 .mr(4) 22228 .nr(8) 22229 .kr(4) 22230 .sr(1) 22231 .m(4) 22232 .n(n) 22233 .k(k) 22234 .cn_stride(11) 22235 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22236 } 22237 } 22238 } 22239 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8_subtile)22240 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_subtile) { 22241 TEST_REQUIRES_ARM_NEON_DOT; 22242 for (uint32_t n = 9; n < 16; n++) { 22243 for (size_t k = 1; k <= 40; k += 9) { 22244 for (uint32_t m = 1; m <= 4; m++) { 22245 GemmMicrokernelTester() 22246 .mr(4) 22247 .nr(8) 22248 .kr(4) 22249 .sr(1) 22250 .m(m) 22251 .n(n) 22252 .k(k) 22253 .iterations(1) 22254 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22255 } 22256 } 22257 } 22258 } 22259 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,n_div_8)22260 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_div_8) { 22261 TEST_REQUIRES_ARM_NEON_DOT; 22262 for (uint32_t n = 16; n <= 24; n += 8) { 22263 for (size_t k = 1; k <= 40; k += 9) { 22264 GemmMicrokernelTester() 22265 .mr(4) 22266 .nr(8) 22267 .kr(4) 22268 .sr(1) 22269 .m(4) 22270 .n(n) 22271 .k(k) 22272 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22273 } 22274 } 22275 } 22276 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,n_div_8_strided_cn)22277 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_strided_cn) { 22278 TEST_REQUIRES_ARM_NEON_DOT; 22279 for (uint32_t n = 16; n <= 24; n += 8) { 22280 for (size_t k = 1; k <= 40; k += 9) { 22281 GemmMicrokernelTester() 22282 .mr(4) 22283 .nr(8) 22284 .kr(4) 22285 .sr(1) 22286 .m(4) 22287 .n(n) 22288 .k(k) 22289 .cn_stride(11) 22290 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22291 } 22292 } 22293 } 22294 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,n_div_8_subtile)22295 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_subtile) { 22296 TEST_REQUIRES_ARM_NEON_DOT; 22297 for (uint32_t n = 16; n <= 24; n += 8) { 22298 for (size_t k = 1; k <= 40; k += 9) { 22299 for (uint32_t m = 1; m <= 4; m++) { 22300 GemmMicrokernelTester() 22301 .mr(4) 22302 .nr(8) 22303 .kr(4) 22304 .sr(1) 22305 .m(m) 22306 .n(n) 22307 .k(k) 22308 .iterations(1) 22309 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22310 } 22311 } 22312 } 22313 } 22314 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,small_kernel)22315 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, small_kernel) { 22316 TEST_REQUIRES_ARM_NEON_DOT; 22317 for (size_t k = 1; k <= 40; k += 9) { 22318 GemmMicrokernelTester() 22319 .mr(4) 22320 .nr(8) 22321 .kr(4) 22322 .sr(1) 22323 .m(4) 22324 .n(8) 22325 .k(k) 22326 .ks(3) 22327 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22328 } 22329 } 22330 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,small_kernel_subtile)22331 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, small_kernel_subtile) { 22332 TEST_REQUIRES_ARM_NEON_DOT; 22333 for (size_t k = 1; k <= 40; k += 9) { 22334 for (uint32_t n = 1; n <= 8; n++) { 22335 for (uint32_t m = 1; m <= 4; m++) { 22336 GemmMicrokernelTester() 22337 .mr(4) 22338 .nr(8) 22339 .kr(4) 22340 .sr(1) 22341 .m(m) 22342 .n(n) 22343 .k(k) 22344 .ks(3) 22345 .iterations(1) 22346 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22347 } 22348 } 22349 } 22350 } 22351 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8_small_kernel)22352 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_small_kernel) { 22353 TEST_REQUIRES_ARM_NEON_DOT; 22354 for (uint32_t n = 9; n < 16; n++) { 22355 for (size_t k = 1; k <= 40; k += 9) { 22356 GemmMicrokernelTester() 22357 .mr(4) 22358 .nr(8) 22359 .kr(4) 22360 .sr(1) 22361 .m(4) 22362 .n(n) 22363 .k(k) 22364 .ks(3) 22365 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22366 } 22367 } 22368 } 22369 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,n_div_8_small_kernel)22370 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_small_kernel) { 22371 TEST_REQUIRES_ARM_NEON_DOT; 22372 for (uint32_t n = 16; n <= 24; n += 8) { 22373 for (size_t k = 1; k <= 40; k += 9) { 22374 GemmMicrokernelTester() 22375 .mr(4) 22376 .nr(8) 22377 .kr(4) 22378 .sr(1) 22379 .m(4) 22380 .n(n) 22381 .k(k) 22382 .ks(3) 22383 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22384 } 22385 } 22386 } 22387 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,strided_cm_subtile)22388 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, strided_cm_subtile) { 22389 TEST_REQUIRES_ARM_NEON_DOT; 22390 for (size_t k = 1; k <= 40; k += 9) { 22391 for (uint32_t n = 1; n <= 8; n++) { 22392 for (uint32_t m = 1; m <= 4; m++) { 22393 GemmMicrokernelTester() 22394 .mr(4) 22395 .nr(8) 22396 .kr(4) 22397 .sr(1) 22398 .m(m) 22399 .n(n) 22400 .k(k) 22401 .cm_stride(11) 22402 .iterations(1) 22403 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22404 } 22405 } 22406 } 22407 } 22408 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,a_offset)22409 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, a_offset) { 22410 TEST_REQUIRES_ARM_NEON_DOT; 22411 for (size_t k = 1; k <= 40; k += 9) { 22412 GemmMicrokernelTester() 22413 .mr(4) 22414 .nr(8) 22415 .kr(4) 22416 .sr(1) 22417 .m(4) 22418 .n(8) 22419 .k(k) 22420 .ks(3) 22421 .a_offset(163) 22422 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22423 } 22424 } 22425 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,zero)22426 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, zero) { 22427 TEST_REQUIRES_ARM_NEON_DOT; 22428 for (size_t k = 1; k <= 40; k += 9) { 22429 for (uint32_t mz = 0; mz < 4; mz++) { 22430 GemmMicrokernelTester() 22431 .mr(4) 22432 .nr(8) 22433 .kr(4) 22434 .sr(1) 22435 .m(4) 22436 .n(8) 22437 .k(k) 22438 .ks(3) 22439 .a_offset(163) 22440 .zero_index(mz) 22441 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22442 } 22443 } 22444 } 22445 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,qmin)22446 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, qmin) { 22447 TEST_REQUIRES_ARM_NEON_DOT; 22448 GemmMicrokernelTester() 22449 .mr(4) 22450 .nr(8) 22451 .kr(4) 22452 .sr(1) 22453 .m(4) 22454 .n(8) 22455 .k(8) 22456 .qmin(128) 22457 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22458 } 22459 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,qmax)22460 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, qmax) { 22461 TEST_REQUIRES_ARM_NEON_DOT; 22462 GemmMicrokernelTester() 22463 .mr(4) 22464 .nr(8) 22465 .kr(4) 22466 .sr(1) 22467 .m(4) 22468 .n(8) 22469 .k(8) 22470 .qmax(128) 22471 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22472 } 22473 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64,strided_cm)22474 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8C4__AARCH32_NEONDOT_LD64, strided_cm) { 22475 TEST_REQUIRES_ARM_NEON_DOT; 22476 GemmMicrokernelTester() 22477 .mr(4) 22478 .nr(8) 22479 .kr(4) 22480 .sr(1) 22481 .m(4) 22482 .n(8) 22483 .k(8) 22484 .cm_stride(11) 22485 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22486 } 22487 #endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT 22488