1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 // 9 // Auto-generated file. Do not edit! 10 // Specification: test/qs8-gemm-minmax-rndnu.yaml 11 // Generator: tools/generate-gemm-test.py 12 13 14 #include <gtest/gtest.h> 15 16 #include <xnnpack/allocator.h> 17 #include <xnnpack/common.h> 18 #include <xnnpack/isa-checks.h> 19 20 #include <xnnpack/gemm.h> 21 #include <xnnpack/igemm.h> 22 #include <xnnpack/ppmm.h> 23 #include "gemm-microkernel-tester.h" 24 25 26 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8)27 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) { 28 TEST_REQUIRES_ARM_NEON; 29 GemmMicrokernelTester() 30 .mr(4) 31 .nr(8) 32 .kr(1) 33 .sr(1) 34 .m(4) 35 .n(8) 36 .k(8) 37 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 38 } 39 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,strided_cn)40 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cn) { 41 TEST_REQUIRES_ARM_NEON; 42 GemmMicrokernelTester() 43 .mr(4) 44 .nr(8) 45 .kr(1) 46 .sr(1) 47 .m(4) 48 .n(8) 49 .k(8) 50 .cn_stride(11) 51 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 52 } 53 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_strided_a)54 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_strided_a) { 55 TEST_REQUIRES_ARM_NEON; 56 GemmMicrokernelTester() 57 .mr(4) 58 .nr(8) 59 .kr(1) 60 .sr(1) 61 .m(4) 62 .n(8) 63 .k(8) 64 .a_stride(11) 65 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 66 } 67 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile)68 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) { 69 TEST_REQUIRES_ARM_NEON; 70 for (uint32_t n = 1; n <= 8; n++) { 71 for (uint32_t m = 1; m <= 4; m++) { 72 GemmMicrokernelTester() 73 .mr(4) 74 .nr(8) 75 .kr(1) 76 .sr(1) 77 .m(m) 78 .n(n) 79 .k(8) 80 .iterations(1) 81 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 82 } 83 } 84 } 85 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_m)86 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) { 87 TEST_REQUIRES_ARM_NEON; 88 for (uint32_t m = 1; m <= 4; m++) { 89 GemmMicrokernelTester() 90 .mr(4) 91 .nr(8) 92 .kr(1) 93 .sr(1) 94 .m(m) 95 .n(8) 96 .k(8) 97 .iterations(1) 98 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 99 } 100 } 101 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_n)102 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) { 103 TEST_REQUIRES_ARM_NEON; 104 for (uint32_t n = 1; n <= 8; n++) { 105 GemmMicrokernelTester() 106 .mr(4) 107 .nr(8) 108 .kr(1) 109 .sr(1) 110 .m(4) 111 .n(n) 112 .k(8) 113 .iterations(1) 114 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 115 } 116 } 117 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_lt_8)118 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) { 119 TEST_REQUIRES_ARM_NEON; 120 for (size_t k = 1; k < 8; k++) { 121 GemmMicrokernelTester() 122 .mr(4) 123 .nr(8) 124 .kr(1) 125 .sr(1) 126 .m(4) 127 .n(8) 128 .k(k) 129 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 130 } 131 } 132 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_lt_8_strided_a)133 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_strided_a) { 134 TEST_REQUIRES_ARM_NEON; 135 for (size_t k = 1; k < 8; k++) { 136 GemmMicrokernelTester() 137 .mr(4) 138 .nr(8) 139 .kr(1) 140 .sr(1) 141 .m(4) 142 .n(8) 143 .k(k) 144 .a_stride(11) 145 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 146 } 147 } 148 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_lt_8_subtile)149 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) { 150 TEST_REQUIRES_ARM_NEON; 151 for (size_t k = 1; k < 8; k++) { 152 for (uint32_t n = 1; n <= 8; n++) { 153 for (uint32_t m = 1; m <= 4; m++) { 154 GemmMicrokernelTester() 155 .mr(4) 156 .nr(8) 157 .kr(1) 158 .sr(1) 159 .m(m) 160 .n(n) 161 .k(k) 162 .iterations(1) 163 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 164 } 165 } 166 } 167 } 168 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_gt_8)169 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) { 170 TEST_REQUIRES_ARM_NEON; 171 for (size_t k = 9; k < 16; k++) { 172 GemmMicrokernelTester() 173 .mr(4) 174 .nr(8) 175 .kr(1) 176 .sr(1) 177 .m(4) 178 .n(8) 179 .k(k) 180 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 181 } 182 } 183 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_gt_8_strided_a)184 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_strided_a) { 185 TEST_REQUIRES_ARM_NEON; 186 for (size_t k = 9; k < 16; k++) { 187 GemmMicrokernelTester() 188 .mr(4) 189 .nr(8) 190 .kr(1) 191 .sr(1) 192 .m(4) 193 .n(8) 194 .k(k) 195 .a_stride(19) 196 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 197 } 198 } 199 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_gt_8_subtile)200 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) { 201 TEST_REQUIRES_ARM_NEON; 202 for (size_t k = 9; k < 16; k++) { 203 for (uint32_t n = 1; n <= 8; n++) { 204 for (uint32_t m = 1; m <= 4; m++) { 205 GemmMicrokernelTester() 206 .mr(4) 207 .nr(8) 208 .kr(1) 209 .sr(1) 210 .m(m) 211 .n(n) 212 .k(k) 213 .iterations(1) 214 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 215 } 216 } 217 } 218 } 219 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_div_8)220 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8) { 221 TEST_REQUIRES_ARM_NEON; 222 for (size_t k = 16; k <= 80; k += 8) { 223 GemmMicrokernelTester() 224 .mr(4) 225 .nr(8) 226 .kr(1) 227 .sr(1) 228 .m(4) 229 .n(8) 230 .k(k) 231 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 232 } 233 } 234 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_div_8_strided_a)235 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8_strided_a) { 236 TEST_REQUIRES_ARM_NEON; 237 for (size_t k = 16; k <= 80; k += 8) { 238 GemmMicrokernelTester() 239 .mr(4) 240 .nr(8) 241 .kr(1) 242 .sr(1) 243 .m(4) 244 .n(8) 245 .k(k) 246 .a_stride(83) 247 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 248 } 249 } 250 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_div_8_subtile)251 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) { 252 TEST_REQUIRES_ARM_NEON; 253 for (size_t k = 16; k <= 80; k += 8) { 254 for (uint32_t n = 1; n <= 8; n++) { 255 for (uint32_t m = 1; m <= 4; m++) { 256 GemmMicrokernelTester() 257 .mr(4) 258 .nr(8) 259 .kr(1) 260 .sr(1) 261 .m(m) 262 .n(n) 263 .k(k) 264 .iterations(1) 265 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 266 } 267 } 268 } 269 } 270 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8)271 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8) { 272 TEST_REQUIRES_ARM_NEON; 273 for (uint32_t n = 9; n < 16; n++) { 274 for (size_t k = 1; k <= 40; k += 9) { 275 GemmMicrokernelTester() 276 .mr(4) 277 .nr(8) 278 .kr(1) 279 .sr(1) 280 .m(4) 281 .n(n) 282 .k(k) 283 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 284 } 285 } 286 } 287 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8_strided_cn)288 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_strided_cn) { 289 TEST_REQUIRES_ARM_NEON; 290 for (uint32_t n = 9; n < 16; n++) { 291 for (size_t k = 1; k <= 40; k += 9) { 292 GemmMicrokernelTester() 293 .mr(4) 294 .nr(8) 295 .kr(1) 296 .sr(1) 297 .m(4) 298 .n(n) 299 .k(k) 300 .cn_stride(11) 301 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 302 } 303 } 304 } 305 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8_strided_a)306 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_strided_a) { 307 TEST_REQUIRES_ARM_NEON; 308 for (uint32_t n = 9; n < 16; n++) { 309 for (size_t k = 1; k <= 40; k += 9) { 310 GemmMicrokernelTester() 311 .mr(4) 312 .nr(8) 313 .kr(1) 314 .sr(1) 315 .m(4) 316 .n(n) 317 .k(k) 318 .a_stride(43) 319 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 320 } 321 } 322 } 323 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8_subtile)324 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_subtile) { 325 TEST_REQUIRES_ARM_NEON; 326 for (uint32_t n = 9; n < 16; n++) { 327 for (size_t k = 1; k <= 40; k += 9) { 328 for (uint32_t m = 1; m <= 4; m++) { 329 GemmMicrokernelTester() 330 .mr(4) 331 .nr(8) 332 .kr(1) 333 .sr(1) 334 .m(m) 335 .n(n) 336 .k(k) 337 .iterations(1) 338 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 339 } 340 } 341 } 342 } 343 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8)344 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8) { 345 TEST_REQUIRES_ARM_NEON; 346 for (uint32_t n = 16; n <= 24; n += 8) { 347 for (size_t k = 1; k <= 40; k += 9) { 348 GemmMicrokernelTester() 349 .mr(4) 350 .nr(8) 351 .kr(1) 352 .sr(1) 353 .m(4) 354 .n(n) 355 .k(k) 356 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 357 } 358 } 359 } 360 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8_strided_cn)361 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_strided_cn) { 362 TEST_REQUIRES_ARM_NEON; 363 for (uint32_t n = 16; n <= 24; n += 8) { 364 for (size_t k = 1; k <= 40; k += 9) { 365 GemmMicrokernelTester() 366 .mr(4) 367 .nr(8) 368 .kr(1) 369 .sr(1) 370 .m(4) 371 .n(n) 372 .k(k) 373 .cn_stride(11) 374 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 375 } 376 } 377 } 378 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8_strided_a)379 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_strided_a) { 380 TEST_REQUIRES_ARM_NEON; 381 for (uint32_t n = 16; n <= 24; n += 8) { 382 for (size_t k = 1; k <= 40; k += 9) { 383 GemmMicrokernelTester() 384 .mr(4) 385 .nr(8) 386 .kr(1) 387 .sr(1) 388 .m(4) 389 .n(n) 390 .k(k) 391 .a_stride(43) 392 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 393 } 394 } 395 } 396 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8_subtile)397 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_subtile) { 398 TEST_REQUIRES_ARM_NEON; 399 for (uint32_t n = 16; n <= 24; n += 8) { 400 for (size_t k = 1; k <= 40; k += 9) { 401 for (uint32_t m = 1; m <= 4; m++) { 402 GemmMicrokernelTester() 403 .mr(4) 404 .nr(8) 405 .kr(1) 406 .sr(1) 407 .m(m) 408 .n(n) 409 .k(k) 410 .iterations(1) 411 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 412 } 413 } 414 } 415 } 416 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,strided_cm_subtile)417 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) { 418 TEST_REQUIRES_ARM_NEON; 419 for (size_t k = 1; k <= 40; k += 9) { 420 for (uint32_t n = 1; n <= 8; n++) { 421 for (uint32_t m = 1; m <= 4; m++) { 422 GemmMicrokernelTester() 423 .mr(4) 424 .nr(8) 425 .kr(1) 426 .sr(1) 427 .m(m) 428 .n(n) 429 .k(k) 430 .cm_stride(11) 431 .iterations(1) 432 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 433 } 434 } 435 } 436 } 437 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,qmin)438 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmin) { 439 TEST_REQUIRES_ARM_NEON; 440 GemmMicrokernelTester() 441 .mr(4) 442 .nr(8) 443 .kr(1) 444 .sr(1) 445 .m(4) 446 .n(8) 447 .k(8) 448 .qmin(128) 449 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 450 } 451 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,qmax)452 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmax) { 453 TEST_REQUIRES_ARM_NEON; 454 GemmMicrokernelTester() 455 .mr(4) 456 .nr(8) 457 .kr(1) 458 .sr(1) 459 .m(4) 460 .n(8) 461 .k(8) 462 .qmax(128) 463 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 464 } 465 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,strided_cm)466 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm) { 467 TEST_REQUIRES_ARM_NEON; 468 GemmMicrokernelTester() 469 .mr(4) 470 .nr(8) 471 .kr(1) 472 .sr(1) 473 .m(4) 474 .n(8) 475 .k(8) 476 .cm_stride(11) 477 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 478 } 479 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY 480 481 482 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_eq_8)483 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8) { 484 TEST_REQUIRES_ARM_NEON; 485 GemmMicrokernelTester() 486 .mr(2) 487 .nr(8) 488 .kr(2) 489 .sr(1) 490 .m(2) 491 .n(8) 492 .k(8) 493 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 494 } 495 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,strided_cn)496 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, strided_cn) { 497 TEST_REQUIRES_ARM_NEON; 498 GemmMicrokernelTester() 499 .mr(2) 500 .nr(8) 501 .kr(2) 502 .sr(1) 503 .m(2) 504 .n(8) 505 .k(8) 506 .cn_stride(11) 507 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 508 } 509 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_eq_8_strided_a)510 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8_strided_a) { 511 TEST_REQUIRES_ARM_NEON; 512 GemmMicrokernelTester() 513 .mr(2) 514 .nr(8) 515 .kr(2) 516 .sr(1) 517 .m(2) 518 .n(8) 519 .k(8) 520 .a_stride(11) 521 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 522 } 523 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_eq_8_subtile)524 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8_subtile) { 525 TEST_REQUIRES_ARM_NEON; 526 for (uint32_t n = 1; n <= 8; n++) { 527 for (uint32_t m = 1; m <= 2; m++) { 528 GemmMicrokernelTester() 529 .mr(2) 530 .nr(8) 531 .kr(2) 532 .sr(1) 533 .m(m) 534 .n(n) 535 .k(8) 536 .iterations(1) 537 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 538 } 539 } 540 } 541 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_eq_8_subtile_m)542 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8_subtile_m) { 543 TEST_REQUIRES_ARM_NEON; 544 for (uint32_t m = 1; m <= 2; m++) { 545 GemmMicrokernelTester() 546 .mr(2) 547 .nr(8) 548 .kr(2) 549 .sr(1) 550 .m(m) 551 .n(8) 552 .k(8) 553 .iterations(1) 554 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 555 } 556 } 557 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_eq_8_subtile_n)558 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8_subtile_n) { 559 TEST_REQUIRES_ARM_NEON; 560 for (uint32_t n = 1; n <= 8; n++) { 561 GemmMicrokernelTester() 562 .mr(2) 563 .nr(8) 564 .kr(2) 565 .sr(1) 566 .m(2) 567 .n(n) 568 .k(8) 569 .iterations(1) 570 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 571 } 572 } 573 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_lt_8)574 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_lt_8) { 575 TEST_REQUIRES_ARM_NEON; 576 for (size_t k = 1; k < 8; k++) { 577 GemmMicrokernelTester() 578 .mr(2) 579 .nr(8) 580 .kr(2) 581 .sr(1) 582 .m(2) 583 .n(8) 584 .k(k) 585 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 586 } 587 } 588 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_lt_8_strided_a)589 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_lt_8_strided_a) { 590 TEST_REQUIRES_ARM_NEON; 591 for (size_t k = 1; k < 8; k++) { 592 GemmMicrokernelTester() 593 .mr(2) 594 .nr(8) 595 .kr(2) 596 .sr(1) 597 .m(2) 598 .n(8) 599 .k(k) 600 .a_stride(11) 601 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 602 } 603 } 604 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_lt_8_subtile)605 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_lt_8_subtile) { 606 TEST_REQUIRES_ARM_NEON; 607 for (size_t k = 1; k < 8; k++) { 608 for (uint32_t n = 1; n <= 8; n++) { 609 for (uint32_t m = 1; m <= 2; m++) { 610 GemmMicrokernelTester() 611 .mr(2) 612 .nr(8) 613 .kr(2) 614 .sr(1) 615 .m(m) 616 .n(n) 617 .k(k) 618 .iterations(1) 619 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 620 } 621 } 622 } 623 } 624 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_gt_8)625 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_gt_8) { 626 TEST_REQUIRES_ARM_NEON; 627 for (size_t k = 9; k < 16; k++) { 628 GemmMicrokernelTester() 629 .mr(2) 630 .nr(8) 631 .kr(2) 632 .sr(1) 633 .m(2) 634 .n(8) 635 .k(k) 636 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 637 } 638 } 639 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_gt_8_strided_a)640 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_gt_8_strided_a) { 641 TEST_REQUIRES_ARM_NEON; 642 for (size_t k = 9; k < 16; k++) { 643 GemmMicrokernelTester() 644 .mr(2) 645 .nr(8) 646 .kr(2) 647 .sr(1) 648 .m(2) 649 .n(8) 650 .k(k) 651 .a_stride(19) 652 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 653 } 654 } 655 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_gt_8_subtile)656 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_gt_8_subtile) { 657 TEST_REQUIRES_ARM_NEON; 658 for (size_t k = 9; k < 16; k++) { 659 for (uint32_t n = 1; n <= 8; n++) { 660 for (uint32_t m = 1; m <= 2; m++) { 661 GemmMicrokernelTester() 662 .mr(2) 663 .nr(8) 664 .kr(2) 665 .sr(1) 666 .m(m) 667 .n(n) 668 .k(k) 669 .iterations(1) 670 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 671 } 672 } 673 } 674 } 675 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_div_8)676 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_div_8) { 677 TEST_REQUIRES_ARM_NEON; 678 for (size_t k = 16; k <= 80; k += 8) { 679 GemmMicrokernelTester() 680 .mr(2) 681 .nr(8) 682 .kr(2) 683 .sr(1) 684 .m(2) 685 .n(8) 686 .k(k) 687 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 688 } 689 } 690 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_div_8_strided_a)691 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_div_8_strided_a) { 692 TEST_REQUIRES_ARM_NEON; 693 for (size_t k = 16; k <= 80; k += 8) { 694 GemmMicrokernelTester() 695 .mr(2) 696 .nr(8) 697 .kr(2) 698 .sr(1) 699 .m(2) 700 .n(8) 701 .k(k) 702 .a_stride(83) 703 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 704 } 705 } 706 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_div_8_subtile)707 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_div_8_subtile) { 708 TEST_REQUIRES_ARM_NEON; 709 for (size_t k = 16; k <= 80; k += 8) { 710 for (uint32_t n = 1; n <= 8; n++) { 711 for (uint32_t m = 1; m <= 2; m++) { 712 GemmMicrokernelTester() 713 .mr(2) 714 .nr(8) 715 .kr(2) 716 .sr(1) 717 .m(m) 718 .n(n) 719 .k(k) 720 .iterations(1) 721 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 722 } 723 } 724 } 725 } 726 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,n_gt_8)727 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_gt_8) { 728 TEST_REQUIRES_ARM_NEON; 729 for (uint32_t n = 9; n < 16; n++) { 730 for (size_t k = 1; k <= 40; k += 9) { 731 GemmMicrokernelTester() 732 .mr(2) 733 .nr(8) 734 .kr(2) 735 .sr(1) 736 .m(2) 737 .n(n) 738 .k(k) 739 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 740 } 741 } 742 } 743 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,n_gt_8_strided_cn)744 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_gt_8_strided_cn) { 745 TEST_REQUIRES_ARM_NEON; 746 for (uint32_t n = 9; n < 16; n++) { 747 for (size_t k = 1; k <= 40; k += 9) { 748 GemmMicrokernelTester() 749 .mr(2) 750 .nr(8) 751 .kr(2) 752 .sr(1) 753 .m(2) 754 .n(n) 755 .k(k) 756 .cn_stride(11) 757 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 758 } 759 } 760 } 761 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,n_gt_8_strided_a)762 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_gt_8_strided_a) { 763 TEST_REQUIRES_ARM_NEON; 764 for (uint32_t n = 9; n < 16; n++) { 765 for (size_t k = 1; k <= 40; k += 9) { 766 GemmMicrokernelTester() 767 .mr(2) 768 .nr(8) 769 .kr(2) 770 .sr(1) 771 .m(2) 772 .n(n) 773 .k(k) 774 .a_stride(43) 775 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 776 } 777 } 778 } 779 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,n_gt_8_subtile)780 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_gt_8_subtile) { 781 TEST_REQUIRES_ARM_NEON; 782 for (uint32_t n = 9; n < 16; n++) { 783 for (size_t k = 1; k <= 40; k += 9) { 784 for (uint32_t m = 1; m <= 2; m++) { 785 GemmMicrokernelTester() 786 .mr(2) 787 .nr(8) 788 .kr(2) 789 .sr(1) 790 .m(m) 791 .n(n) 792 .k(k) 793 .iterations(1) 794 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 795 } 796 } 797 } 798 } 799 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,n_div_8)800 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_div_8) { 801 TEST_REQUIRES_ARM_NEON; 802 for (uint32_t n = 16; n <= 24; n += 8) { 803 for (size_t k = 1; k <= 40; k += 9) { 804 GemmMicrokernelTester() 805 .mr(2) 806 .nr(8) 807 .kr(2) 808 .sr(1) 809 .m(2) 810 .n(n) 811 .k(k) 812 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 813 } 814 } 815 } 816 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,n_div_8_strided_cn)817 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_div_8_strided_cn) { 818 TEST_REQUIRES_ARM_NEON; 819 for (uint32_t n = 16; n <= 24; n += 8) { 820 for (size_t k = 1; k <= 40; k += 9) { 821 GemmMicrokernelTester() 822 .mr(2) 823 .nr(8) 824 .kr(2) 825 .sr(1) 826 .m(2) 827 .n(n) 828 .k(k) 829 .cn_stride(11) 830 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 831 } 832 } 833 } 834 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,n_div_8_strided_a)835 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_div_8_strided_a) { 836 TEST_REQUIRES_ARM_NEON; 837 for (uint32_t n = 16; n <= 24; n += 8) { 838 for (size_t k = 1; k <= 40; k += 9) { 839 GemmMicrokernelTester() 840 .mr(2) 841 .nr(8) 842 .kr(2) 843 .sr(1) 844 .m(2) 845 .n(n) 846 .k(k) 847 .a_stride(43) 848 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 849 } 850 } 851 } 852 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,n_div_8_subtile)853 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_div_8_subtile) { 854 TEST_REQUIRES_ARM_NEON; 855 for (uint32_t n = 16; n <= 24; n += 8) { 856 for (size_t k = 1; k <= 40; k += 9) { 857 for (uint32_t m = 1; m <= 2; m++) { 858 GemmMicrokernelTester() 859 .mr(2) 860 .nr(8) 861 .kr(2) 862 .sr(1) 863 .m(m) 864 .n(n) 865 .k(k) 866 .iterations(1) 867 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 868 } 869 } 870 } 871 } 872 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,strided_cm_subtile)873 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, strided_cm_subtile) { 874 TEST_REQUIRES_ARM_NEON; 875 for (size_t k = 1; k <= 40; k += 9) { 876 for (uint32_t n = 1; n <= 8; n++) { 877 for (uint32_t m = 1; m <= 2; m++) { 878 GemmMicrokernelTester() 879 .mr(2) 880 .nr(8) 881 .kr(2) 882 .sr(1) 883 .m(m) 884 .n(n) 885 .k(k) 886 .cm_stride(11) 887 .iterations(1) 888 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 889 } 890 } 891 } 892 } 893 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,qmin)894 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, qmin) { 895 TEST_REQUIRES_ARM_NEON; 896 GemmMicrokernelTester() 897 .mr(2) 898 .nr(8) 899 .kr(2) 900 .sr(1) 901 .m(2) 902 .n(8) 903 .k(8) 904 .qmin(128) 905 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 906 } 907 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,qmax)908 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, qmax) { 909 TEST_REQUIRES_ARM_NEON; 910 GemmMicrokernelTester() 911 .mr(2) 912 .nr(8) 913 .kr(2) 914 .sr(1) 915 .m(2) 916 .n(8) 917 .k(8) 918 .qmax(128) 919 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 920 } 921 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,strided_cm)922 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, strided_cm) { 923 TEST_REQUIRES_ARM_NEON; 924 GemmMicrokernelTester() 925 .mr(2) 926 .nr(8) 927 .kr(2) 928 .sr(1) 929 .m(2) 930 .n(8) 931 .k(8) 932 .cm_stride(11) 933 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 934 } 935 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 936 937 938 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_eq_8)939 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_eq_8) { 940 TEST_REQUIRES_ARM_NEON; 941 GemmMicrokernelTester() 942 .mr(2) 943 .nr(16) 944 .kr(2) 945 .sr(1) 946 .m(2) 947 .n(16) 948 .k(8) 949 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 950 } 951 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,strided_cn)952 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, strided_cn) { 953 TEST_REQUIRES_ARM_NEON; 954 GemmMicrokernelTester() 955 .mr(2) 956 .nr(16) 957 .kr(2) 958 .sr(1) 959 .m(2) 960 .n(16) 961 .k(8) 962 .cn_stride(19) 963 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 964 } 965 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_eq_8_strided_a)966 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_eq_8_strided_a) { 967 TEST_REQUIRES_ARM_NEON; 968 GemmMicrokernelTester() 969 .mr(2) 970 .nr(16) 971 .kr(2) 972 .sr(1) 973 .m(2) 974 .n(16) 975 .k(8) 976 .a_stride(11) 977 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 978 } 979 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_eq_8_subtile)980 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_eq_8_subtile) { 981 TEST_REQUIRES_ARM_NEON; 982 for (uint32_t n = 1; n <= 16; n++) { 983 for (uint32_t m = 1; m <= 2; m++) { 984 GemmMicrokernelTester() 985 .mr(2) 986 .nr(16) 987 .kr(2) 988 .sr(1) 989 .m(m) 990 .n(n) 991 .k(8) 992 .iterations(1) 993 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 994 } 995 } 996 } 997 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_eq_8_subtile_m)998 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_eq_8_subtile_m) { 999 TEST_REQUIRES_ARM_NEON; 1000 for (uint32_t m = 1; m <= 2; m++) { 1001 GemmMicrokernelTester() 1002 .mr(2) 1003 .nr(16) 1004 .kr(2) 1005 .sr(1) 1006 .m(m) 1007 .n(16) 1008 .k(8) 1009 .iterations(1) 1010 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1011 } 1012 } 1013 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_eq_8_subtile_n)1014 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_eq_8_subtile_n) { 1015 TEST_REQUIRES_ARM_NEON; 1016 for (uint32_t n = 1; n <= 16; n++) { 1017 GemmMicrokernelTester() 1018 .mr(2) 1019 .nr(16) 1020 .kr(2) 1021 .sr(1) 1022 .m(2) 1023 .n(n) 1024 .k(8) 1025 .iterations(1) 1026 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1027 } 1028 } 1029 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_lt_8)1030 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_lt_8) { 1031 TEST_REQUIRES_ARM_NEON; 1032 for (size_t k = 1; k < 8; k++) { 1033 GemmMicrokernelTester() 1034 .mr(2) 1035 .nr(16) 1036 .kr(2) 1037 .sr(1) 1038 .m(2) 1039 .n(16) 1040 .k(k) 1041 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1042 } 1043 } 1044 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_lt_8_strided_a)1045 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_lt_8_strided_a) { 1046 TEST_REQUIRES_ARM_NEON; 1047 for (size_t k = 1; k < 8; k++) { 1048 GemmMicrokernelTester() 1049 .mr(2) 1050 .nr(16) 1051 .kr(2) 1052 .sr(1) 1053 .m(2) 1054 .n(16) 1055 .k(k) 1056 .a_stride(11) 1057 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1058 } 1059 } 1060 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_lt_8_subtile)1061 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_lt_8_subtile) { 1062 TEST_REQUIRES_ARM_NEON; 1063 for (size_t k = 1; k < 8; k++) { 1064 for (uint32_t n = 1; n <= 16; n++) { 1065 for (uint32_t m = 1; m <= 2; m++) { 1066 GemmMicrokernelTester() 1067 .mr(2) 1068 .nr(16) 1069 .kr(2) 1070 .sr(1) 1071 .m(m) 1072 .n(n) 1073 .k(k) 1074 .iterations(1) 1075 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1076 } 1077 } 1078 } 1079 } 1080 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_gt_8)1081 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_gt_8) { 1082 TEST_REQUIRES_ARM_NEON; 1083 for (size_t k = 9; k < 16; k++) { 1084 GemmMicrokernelTester() 1085 .mr(2) 1086 .nr(16) 1087 .kr(2) 1088 .sr(1) 1089 .m(2) 1090 .n(16) 1091 .k(k) 1092 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1093 } 1094 } 1095 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_gt_8_strided_a)1096 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_gt_8_strided_a) { 1097 TEST_REQUIRES_ARM_NEON; 1098 for (size_t k = 9; k < 16; k++) { 1099 GemmMicrokernelTester() 1100 .mr(2) 1101 .nr(16) 1102 .kr(2) 1103 .sr(1) 1104 .m(2) 1105 .n(16) 1106 .k(k) 1107 .a_stride(19) 1108 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1109 } 1110 } 1111 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_gt_8_subtile)1112 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_gt_8_subtile) { 1113 TEST_REQUIRES_ARM_NEON; 1114 for (size_t k = 9; k < 16; k++) { 1115 for (uint32_t n = 1; n <= 16; n++) { 1116 for (uint32_t m = 1; m <= 2; m++) { 1117 GemmMicrokernelTester() 1118 .mr(2) 1119 .nr(16) 1120 .kr(2) 1121 .sr(1) 1122 .m(m) 1123 .n(n) 1124 .k(k) 1125 .iterations(1) 1126 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1127 } 1128 } 1129 } 1130 } 1131 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_div_8)1132 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_div_8) { 1133 TEST_REQUIRES_ARM_NEON; 1134 for (size_t k = 16; k <= 80; k += 8) { 1135 GemmMicrokernelTester() 1136 .mr(2) 1137 .nr(16) 1138 .kr(2) 1139 .sr(1) 1140 .m(2) 1141 .n(16) 1142 .k(k) 1143 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1144 } 1145 } 1146 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_div_8_strided_a)1147 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_div_8_strided_a) { 1148 TEST_REQUIRES_ARM_NEON; 1149 for (size_t k = 16; k <= 80; k += 8) { 1150 GemmMicrokernelTester() 1151 .mr(2) 1152 .nr(16) 1153 .kr(2) 1154 .sr(1) 1155 .m(2) 1156 .n(16) 1157 .k(k) 1158 .a_stride(83) 1159 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1160 } 1161 } 1162 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_div_8_subtile)1163 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_div_8_subtile) { 1164 TEST_REQUIRES_ARM_NEON; 1165 for (size_t k = 16; k <= 80; k += 8) { 1166 for (uint32_t n = 1; n <= 16; n++) { 1167 for (uint32_t m = 1; m <= 2; m++) { 1168 GemmMicrokernelTester() 1169 .mr(2) 1170 .nr(16) 1171 .kr(2) 1172 .sr(1) 1173 .m(m) 1174 .n(n) 1175 .k(k) 1176 .iterations(1) 1177 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1178 } 1179 } 1180 } 1181 } 1182 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,n_gt_16)1183 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, n_gt_16) { 1184 TEST_REQUIRES_ARM_NEON; 1185 for (uint32_t n = 17; n < 32; n++) { 1186 for (size_t k = 1; k <= 40; k += 9) { 1187 GemmMicrokernelTester() 1188 .mr(2) 1189 .nr(16) 1190 .kr(2) 1191 .sr(1) 1192 .m(2) 1193 .n(n) 1194 .k(k) 1195 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1196 } 1197 } 1198 } 1199 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,n_gt_16_strided_cn)1200 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, n_gt_16_strided_cn) { 1201 TEST_REQUIRES_ARM_NEON; 1202 for (uint32_t n = 17; n < 32; n++) { 1203 for (size_t k = 1; k <= 40; k += 9) { 1204 GemmMicrokernelTester() 1205 .mr(2) 1206 .nr(16) 1207 .kr(2) 1208 .sr(1) 1209 .m(2) 1210 .n(n) 1211 .k(k) 1212 .cn_stride(19) 1213 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1214 } 1215 } 1216 } 1217 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,n_gt_16_strided_a)1218 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, n_gt_16_strided_a) { 1219 TEST_REQUIRES_ARM_NEON; 1220 for (uint32_t n = 17; n < 32; n++) { 1221 for (size_t k = 1; k <= 40; k += 9) { 1222 GemmMicrokernelTester() 1223 .mr(2) 1224 .nr(16) 1225 .kr(2) 1226 .sr(1) 1227 .m(2) 1228 .n(n) 1229 .k(k) 1230 .a_stride(43) 1231 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1232 } 1233 } 1234 } 1235 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,n_gt_16_subtile)1236 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, n_gt_16_subtile) { 1237 TEST_REQUIRES_ARM_NEON; 1238 for (uint32_t n = 17; n < 32; n++) { 1239 for (size_t k = 1; k <= 40; k += 9) { 1240 for (uint32_t m = 1; m <= 2; m++) { 1241 GemmMicrokernelTester() 1242 .mr(2) 1243 .nr(16) 1244 .kr(2) 1245 .sr(1) 1246 .m(m) 1247 .n(n) 1248 .k(k) 1249 .iterations(1) 1250 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1251 } 1252 } 1253 } 1254 } 1255 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,n_div_16)1256 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, n_div_16) { 1257 TEST_REQUIRES_ARM_NEON; 1258 for (uint32_t n = 32; n <= 48; n += 16) { 1259 for (size_t k = 1; k <= 40; k += 9) { 1260 GemmMicrokernelTester() 1261 .mr(2) 1262 .nr(16) 1263 .kr(2) 1264 .sr(1) 1265 .m(2) 1266 .n(n) 1267 .k(k) 1268 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1269 } 1270 } 1271 } 1272 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,n_div_16_strided_cn)1273 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, n_div_16_strided_cn) { 1274 TEST_REQUIRES_ARM_NEON; 1275 for (uint32_t n = 32; n <= 48; n += 16) { 1276 for (size_t k = 1; k <= 40; k += 9) { 1277 GemmMicrokernelTester() 1278 .mr(2) 1279 .nr(16) 1280 .kr(2) 1281 .sr(1) 1282 .m(2) 1283 .n(n) 1284 .k(k) 1285 .cn_stride(19) 1286 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1287 } 1288 } 1289 } 1290 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,n_div_16_strided_a)1291 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, n_div_16_strided_a) { 1292 TEST_REQUIRES_ARM_NEON; 1293 for (uint32_t n = 32; n <= 48; n += 16) { 1294 for (size_t k = 1; k <= 40; k += 9) { 1295 GemmMicrokernelTester() 1296 .mr(2) 1297 .nr(16) 1298 .kr(2) 1299 .sr(1) 1300 .m(2) 1301 .n(n) 1302 .k(k) 1303 .a_stride(43) 1304 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1305 } 1306 } 1307 } 1308 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,n_div_16_subtile)1309 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, n_div_16_subtile) { 1310 TEST_REQUIRES_ARM_NEON; 1311 for (uint32_t n = 32; n <= 48; n += 16) { 1312 for (size_t k = 1; k <= 40; k += 9) { 1313 for (uint32_t m = 1; m <= 2; m++) { 1314 GemmMicrokernelTester() 1315 .mr(2) 1316 .nr(16) 1317 .kr(2) 1318 .sr(1) 1319 .m(m) 1320 .n(n) 1321 .k(k) 1322 .iterations(1) 1323 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1324 } 1325 } 1326 } 1327 } 1328 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,strided_cm_subtile)1329 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, strided_cm_subtile) { 1330 TEST_REQUIRES_ARM_NEON; 1331 for (size_t k = 1; k <= 40; k += 9) { 1332 for (uint32_t n = 1; n <= 16; n++) { 1333 for (uint32_t m = 1; m <= 2; m++) { 1334 GemmMicrokernelTester() 1335 .mr(2) 1336 .nr(16) 1337 .kr(2) 1338 .sr(1) 1339 .m(m) 1340 .n(n) 1341 .k(k) 1342 .cm_stride(19) 1343 .iterations(1) 1344 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1345 } 1346 } 1347 } 1348 } 1349 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,qmin)1350 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, qmin) { 1351 TEST_REQUIRES_ARM_NEON; 1352 GemmMicrokernelTester() 1353 .mr(2) 1354 .nr(16) 1355 .kr(2) 1356 .sr(1) 1357 .m(2) 1358 .n(16) 1359 .k(8) 1360 .qmin(128) 1361 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1362 } 1363 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,qmax)1364 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, qmax) { 1365 TEST_REQUIRES_ARM_NEON; 1366 GemmMicrokernelTester() 1367 .mr(2) 1368 .nr(16) 1369 .kr(2) 1370 .sr(1) 1371 .m(2) 1372 .n(16) 1373 .k(8) 1374 .qmax(128) 1375 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1376 } 1377 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,strided_cm)1378 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, strided_cm) { 1379 TEST_REQUIRES_ARM_NEON; 1380 GemmMicrokernelTester() 1381 .mr(2) 1382 .nr(16) 1383 .kr(2) 1384 .sr(1) 1385 .m(2) 1386 .n(16) 1387 .k(8) 1388 .cm_stride(19) 1389 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1390 } 1391 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 1392 1393 1394 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_eq_8)1395 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_eq_8) { 1396 TEST_REQUIRES_ARM_NEON; 1397 GemmMicrokernelTester() 1398 .mr(3) 1399 .nr(16) 1400 .kr(2) 1401 .sr(1) 1402 .m(3) 1403 .n(16) 1404 .k(8) 1405 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1406 } 1407 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,strided_cn)1408 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, strided_cn) { 1409 TEST_REQUIRES_ARM_NEON; 1410 GemmMicrokernelTester() 1411 .mr(3) 1412 .nr(16) 1413 .kr(2) 1414 .sr(1) 1415 .m(3) 1416 .n(16) 1417 .k(8) 1418 .cn_stride(19) 1419 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1420 } 1421 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_eq_8_strided_a)1422 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_eq_8_strided_a) { 1423 TEST_REQUIRES_ARM_NEON; 1424 GemmMicrokernelTester() 1425 .mr(3) 1426 .nr(16) 1427 .kr(2) 1428 .sr(1) 1429 .m(3) 1430 .n(16) 1431 .k(8) 1432 .a_stride(11) 1433 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1434 } 1435 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_eq_8_subtile)1436 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_eq_8_subtile) { 1437 TEST_REQUIRES_ARM_NEON; 1438 for (uint32_t n = 1; n <= 16; n++) { 1439 for (uint32_t m = 1; m <= 3; m++) { 1440 GemmMicrokernelTester() 1441 .mr(3) 1442 .nr(16) 1443 .kr(2) 1444 .sr(1) 1445 .m(m) 1446 .n(n) 1447 .k(8) 1448 .iterations(1) 1449 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1450 } 1451 } 1452 } 1453 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_eq_8_subtile_m)1454 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_eq_8_subtile_m) { 1455 TEST_REQUIRES_ARM_NEON; 1456 for (uint32_t m = 1; m <= 3; m++) { 1457 GemmMicrokernelTester() 1458 .mr(3) 1459 .nr(16) 1460 .kr(2) 1461 .sr(1) 1462 .m(m) 1463 .n(16) 1464 .k(8) 1465 .iterations(1) 1466 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1467 } 1468 } 1469 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_eq_8_subtile_n)1470 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_eq_8_subtile_n) { 1471 TEST_REQUIRES_ARM_NEON; 1472 for (uint32_t n = 1; n <= 16; n++) { 1473 GemmMicrokernelTester() 1474 .mr(3) 1475 .nr(16) 1476 .kr(2) 1477 .sr(1) 1478 .m(3) 1479 .n(n) 1480 .k(8) 1481 .iterations(1) 1482 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1483 } 1484 } 1485 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_lt_8)1486 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_lt_8) { 1487 TEST_REQUIRES_ARM_NEON; 1488 for (size_t k = 1; k < 8; k++) { 1489 GemmMicrokernelTester() 1490 .mr(3) 1491 .nr(16) 1492 .kr(2) 1493 .sr(1) 1494 .m(3) 1495 .n(16) 1496 .k(k) 1497 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1498 } 1499 } 1500 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_lt_8_strided_a)1501 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_lt_8_strided_a) { 1502 TEST_REQUIRES_ARM_NEON; 1503 for (size_t k = 1; k < 8; k++) { 1504 GemmMicrokernelTester() 1505 .mr(3) 1506 .nr(16) 1507 .kr(2) 1508 .sr(1) 1509 .m(3) 1510 .n(16) 1511 .k(k) 1512 .a_stride(11) 1513 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1514 } 1515 } 1516 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_lt_8_subtile)1517 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_lt_8_subtile) { 1518 TEST_REQUIRES_ARM_NEON; 1519 for (size_t k = 1; k < 8; k++) { 1520 for (uint32_t n = 1; n <= 16; n++) { 1521 for (uint32_t m = 1; m <= 3; m++) { 1522 GemmMicrokernelTester() 1523 .mr(3) 1524 .nr(16) 1525 .kr(2) 1526 .sr(1) 1527 .m(m) 1528 .n(n) 1529 .k(k) 1530 .iterations(1) 1531 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1532 } 1533 } 1534 } 1535 } 1536 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_gt_8)1537 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_gt_8) { 1538 TEST_REQUIRES_ARM_NEON; 1539 for (size_t k = 9; k < 16; k++) { 1540 GemmMicrokernelTester() 1541 .mr(3) 1542 .nr(16) 1543 .kr(2) 1544 .sr(1) 1545 .m(3) 1546 .n(16) 1547 .k(k) 1548 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1549 } 1550 } 1551 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_gt_8_strided_a)1552 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_gt_8_strided_a) { 1553 TEST_REQUIRES_ARM_NEON; 1554 for (size_t k = 9; k < 16; k++) { 1555 GemmMicrokernelTester() 1556 .mr(3) 1557 .nr(16) 1558 .kr(2) 1559 .sr(1) 1560 .m(3) 1561 .n(16) 1562 .k(k) 1563 .a_stride(19) 1564 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1565 } 1566 } 1567 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_gt_8_subtile)1568 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_gt_8_subtile) { 1569 TEST_REQUIRES_ARM_NEON; 1570 for (size_t k = 9; k < 16; k++) { 1571 for (uint32_t n = 1; n <= 16; n++) { 1572 for (uint32_t m = 1; m <= 3; m++) { 1573 GemmMicrokernelTester() 1574 .mr(3) 1575 .nr(16) 1576 .kr(2) 1577 .sr(1) 1578 .m(m) 1579 .n(n) 1580 .k(k) 1581 .iterations(1) 1582 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1583 } 1584 } 1585 } 1586 } 1587 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_div_8)1588 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_div_8) { 1589 TEST_REQUIRES_ARM_NEON; 1590 for (size_t k = 16; k <= 80; k += 8) { 1591 GemmMicrokernelTester() 1592 .mr(3) 1593 .nr(16) 1594 .kr(2) 1595 .sr(1) 1596 .m(3) 1597 .n(16) 1598 .k(k) 1599 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1600 } 1601 } 1602 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_div_8_strided_a)1603 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_div_8_strided_a) { 1604 TEST_REQUIRES_ARM_NEON; 1605 for (size_t k = 16; k <= 80; k += 8) { 1606 GemmMicrokernelTester() 1607 .mr(3) 1608 .nr(16) 1609 .kr(2) 1610 .sr(1) 1611 .m(3) 1612 .n(16) 1613 .k(k) 1614 .a_stride(83) 1615 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1616 } 1617 } 1618 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_div_8_subtile)1619 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_div_8_subtile) { 1620 TEST_REQUIRES_ARM_NEON; 1621 for (size_t k = 16; k <= 80; k += 8) { 1622 for (uint32_t n = 1; n <= 16; n++) { 1623 for (uint32_t m = 1; m <= 3; m++) { 1624 GemmMicrokernelTester() 1625 .mr(3) 1626 .nr(16) 1627 .kr(2) 1628 .sr(1) 1629 .m(m) 1630 .n(n) 1631 .k(k) 1632 .iterations(1) 1633 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1634 } 1635 } 1636 } 1637 } 1638 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,n_gt_16)1639 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, n_gt_16) { 1640 TEST_REQUIRES_ARM_NEON; 1641 for (uint32_t n = 17; n < 32; n++) { 1642 for (size_t k = 1; k <= 40; k += 9) { 1643 GemmMicrokernelTester() 1644 .mr(3) 1645 .nr(16) 1646 .kr(2) 1647 .sr(1) 1648 .m(3) 1649 .n(n) 1650 .k(k) 1651 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1652 } 1653 } 1654 } 1655 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,n_gt_16_strided_cn)1656 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, n_gt_16_strided_cn) { 1657 TEST_REQUIRES_ARM_NEON; 1658 for (uint32_t n = 17; n < 32; n++) { 1659 for (size_t k = 1; k <= 40; k += 9) { 1660 GemmMicrokernelTester() 1661 .mr(3) 1662 .nr(16) 1663 .kr(2) 1664 .sr(1) 1665 .m(3) 1666 .n(n) 1667 .k(k) 1668 .cn_stride(19) 1669 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1670 } 1671 } 1672 } 1673 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,n_gt_16_strided_a)1674 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, n_gt_16_strided_a) { 1675 TEST_REQUIRES_ARM_NEON; 1676 for (uint32_t n = 17; n < 32; n++) { 1677 for (size_t k = 1; k <= 40; k += 9) { 1678 GemmMicrokernelTester() 1679 .mr(3) 1680 .nr(16) 1681 .kr(2) 1682 .sr(1) 1683 .m(3) 1684 .n(n) 1685 .k(k) 1686 .a_stride(43) 1687 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1688 } 1689 } 1690 } 1691 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,n_gt_16_subtile)1692 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, n_gt_16_subtile) { 1693 TEST_REQUIRES_ARM_NEON; 1694 for (uint32_t n = 17; n < 32; n++) { 1695 for (size_t k = 1; k <= 40; k += 9) { 1696 for (uint32_t m = 1; m <= 3; m++) { 1697 GemmMicrokernelTester() 1698 .mr(3) 1699 .nr(16) 1700 .kr(2) 1701 .sr(1) 1702 .m(m) 1703 .n(n) 1704 .k(k) 1705 .iterations(1) 1706 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1707 } 1708 } 1709 } 1710 } 1711 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,n_div_16)1712 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, n_div_16) { 1713 TEST_REQUIRES_ARM_NEON; 1714 for (uint32_t n = 32; n <= 48; n += 16) { 1715 for (size_t k = 1; k <= 40; k += 9) { 1716 GemmMicrokernelTester() 1717 .mr(3) 1718 .nr(16) 1719 .kr(2) 1720 .sr(1) 1721 .m(3) 1722 .n(n) 1723 .k(k) 1724 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1725 } 1726 } 1727 } 1728 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,n_div_16_strided_cn)1729 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, n_div_16_strided_cn) { 1730 TEST_REQUIRES_ARM_NEON; 1731 for (uint32_t n = 32; n <= 48; n += 16) { 1732 for (size_t k = 1; k <= 40; k += 9) { 1733 GemmMicrokernelTester() 1734 .mr(3) 1735 .nr(16) 1736 .kr(2) 1737 .sr(1) 1738 .m(3) 1739 .n(n) 1740 .k(k) 1741 .cn_stride(19) 1742 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1743 } 1744 } 1745 } 1746 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,n_div_16_strided_a)1747 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, n_div_16_strided_a) { 1748 TEST_REQUIRES_ARM_NEON; 1749 for (uint32_t n = 32; n <= 48; n += 16) { 1750 for (size_t k = 1; k <= 40; k += 9) { 1751 GemmMicrokernelTester() 1752 .mr(3) 1753 .nr(16) 1754 .kr(2) 1755 .sr(1) 1756 .m(3) 1757 .n(n) 1758 .k(k) 1759 .a_stride(43) 1760 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1761 } 1762 } 1763 } 1764 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,n_div_16_subtile)1765 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, n_div_16_subtile) { 1766 TEST_REQUIRES_ARM_NEON; 1767 for (uint32_t n = 32; n <= 48; n += 16) { 1768 for (size_t k = 1; k <= 40; k += 9) { 1769 for (uint32_t m = 1; m <= 3; m++) { 1770 GemmMicrokernelTester() 1771 .mr(3) 1772 .nr(16) 1773 .kr(2) 1774 .sr(1) 1775 .m(m) 1776 .n(n) 1777 .k(k) 1778 .iterations(1) 1779 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1780 } 1781 } 1782 } 1783 } 1784 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,strided_cm_subtile)1785 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, strided_cm_subtile) { 1786 TEST_REQUIRES_ARM_NEON; 1787 for (size_t k = 1; k <= 40; k += 9) { 1788 for (uint32_t n = 1; n <= 16; n++) { 1789 for (uint32_t m = 1; m <= 3; m++) { 1790 GemmMicrokernelTester() 1791 .mr(3) 1792 .nr(16) 1793 .kr(2) 1794 .sr(1) 1795 .m(m) 1796 .n(n) 1797 .k(k) 1798 .cm_stride(19) 1799 .iterations(1) 1800 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1801 } 1802 } 1803 } 1804 } 1805 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,qmin)1806 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, qmin) { 1807 TEST_REQUIRES_ARM_NEON; 1808 GemmMicrokernelTester() 1809 .mr(3) 1810 .nr(16) 1811 .kr(2) 1812 .sr(1) 1813 .m(3) 1814 .n(16) 1815 .k(8) 1816 .qmin(128) 1817 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1818 } 1819 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,qmax)1820 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, qmax) { 1821 TEST_REQUIRES_ARM_NEON; 1822 GemmMicrokernelTester() 1823 .mr(3) 1824 .nr(16) 1825 .kr(2) 1826 .sr(1) 1827 .m(3) 1828 .n(16) 1829 .k(8) 1830 .qmax(128) 1831 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1832 } 1833 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,strided_cm)1834 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, strided_cm) { 1835 TEST_REQUIRES_ARM_NEON; 1836 GemmMicrokernelTester() 1837 .mr(3) 1838 .nr(16) 1839 .kr(2) 1840 .sr(1) 1841 .m(3) 1842 .n(16) 1843 .k(8) 1844 .cm_stride(19) 1845 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1846 } 1847 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 1848 1849 1850 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_eq_16)1851 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_eq_16) { 1852 TEST_REQUIRES_ARM_NEON; 1853 GemmMicrokernelTester() 1854 .mr(3) 1855 .nr(8) 1856 .kr(2) 1857 .sr(1) 1858 .m(3) 1859 .n(8) 1860 .k(16) 1861 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1862 } 1863 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,strided_cn)1864 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, strided_cn) { 1865 TEST_REQUIRES_ARM_NEON; 1866 GemmMicrokernelTester() 1867 .mr(3) 1868 .nr(8) 1869 .kr(2) 1870 .sr(1) 1871 .m(3) 1872 .n(8) 1873 .k(16) 1874 .cn_stride(11) 1875 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1876 } 1877 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_eq_16_strided_a)1878 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_eq_16_strided_a) { 1879 TEST_REQUIRES_ARM_NEON; 1880 GemmMicrokernelTester() 1881 .mr(3) 1882 .nr(8) 1883 .kr(2) 1884 .sr(1) 1885 .m(3) 1886 .n(8) 1887 .k(16) 1888 .a_stride(19) 1889 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1890 } 1891 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_eq_16_subtile)1892 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_eq_16_subtile) { 1893 TEST_REQUIRES_ARM_NEON; 1894 for (uint32_t n = 1; n <= 8; n++) { 1895 for (uint32_t m = 1; m <= 3; m++) { 1896 GemmMicrokernelTester() 1897 .mr(3) 1898 .nr(8) 1899 .kr(2) 1900 .sr(1) 1901 .m(m) 1902 .n(n) 1903 .k(16) 1904 .iterations(1) 1905 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1906 } 1907 } 1908 } 1909 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_m)1910 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) { 1911 TEST_REQUIRES_ARM_NEON; 1912 for (uint32_t m = 1; m <= 3; m++) { 1913 GemmMicrokernelTester() 1914 .mr(3) 1915 .nr(8) 1916 .kr(2) 1917 .sr(1) 1918 .m(m) 1919 .n(8) 1920 .k(16) 1921 .iterations(1) 1922 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1923 } 1924 } 1925 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_n)1926 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) { 1927 TEST_REQUIRES_ARM_NEON; 1928 for (uint32_t n = 1; n <= 8; n++) { 1929 GemmMicrokernelTester() 1930 .mr(3) 1931 .nr(8) 1932 .kr(2) 1933 .sr(1) 1934 .m(3) 1935 .n(n) 1936 .k(16) 1937 .iterations(1) 1938 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1939 } 1940 } 1941 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_lt_16)1942 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_lt_16) { 1943 TEST_REQUIRES_ARM_NEON; 1944 for (size_t k = 1; k < 16; k++) { 1945 GemmMicrokernelTester() 1946 .mr(3) 1947 .nr(8) 1948 .kr(2) 1949 .sr(1) 1950 .m(3) 1951 .n(8) 1952 .k(k) 1953 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1954 } 1955 } 1956 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_lt_16_strided_a)1957 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_lt_16_strided_a) { 1958 TEST_REQUIRES_ARM_NEON; 1959 for (size_t k = 1; k < 16; k++) { 1960 GemmMicrokernelTester() 1961 .mr(3) 1962 .nr(8) 1963 .kr(2) 1964 .sr(1) 1965 .m(3) 1966 .n(8) 1967 .k(k) 1968 .a_stride(19) 1969 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1970 } 1971 } 1972 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_lt_16_subtile)1973 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_lt_16_subtile) { 1974 TEST_REQUIRES_ARM_NEON; 1975 for (size_t k = 1; k < 16; k++) { 1976 for (uint32_t n = 1; n <= 8; n++) { 1977 for (uint32_t m = 1; m <= 3; m++) { 1978 GemmMicrokernelTester() 1979 .mr(3) 1980 .nr(8) 1981 .kr(2) 1982 .sr(1) 1983 .m(m) 1984 .n(n) 1985 .k(k) 1986 .iterations(1) 1987 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1988 } 1989 } 1990 } 1991 } 1992 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_gt_16)1993 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_gt_16) { 1994 TEST_REQUIRES_ARM_NEON; 1995 for (size_t k = 17; k < 32; k++) { 1996 GemmMicrokernelTester() 1997 .mr(3) 1998 .nr(8) 1999 .kr(2) 2000 .sr(1) 2001 .m(3) 2002 .n(8) 2003 .k(k) 2004 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2005 } 2006 } 2007 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_gt_16_strided_a)2008 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_gt_16_strided_a) { 2009 TEST_REQUIRES_ARM_NEON; 2010 for (size_t k = 17; k < 32; k++) { 2011 GemmMicrokernelTester() 2012 .mr(3) 2013 .nr(8) 2014 .kr(2) 2015 .sr(1) 2016 .m(3) 2017 .n(8) 2018 .k(k) 2019 .a_stride(37) 2020 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2021 } 2022 } 2023 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_gt_16_subtile)2024 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_gt_16_subtile) { 2025 TEST_REQUIRES_ARM_NEON; 2026 for (size_t k = 17; k < 32; k++) { 2027 for (uint32_t n = 1; n <= 8; n++) { 2028 for (uint32_t m = 1; m <= 3; m++) { 2029 GemmMicrokernelTester() 2030 .mr(3) 2031 .nr(8) 2032 .kr(2) 2033 .sr(1) 2034 .m(m) 2035 .n(n) 2036 .k(k) 2037 .iterations(1) 2038 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2039 } 2040 } 2041 } 2042 } 2043 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_div_16)2044 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_div_16) { 2045 TEST_REQUIRES_ARM_NEON; 2046 for (size_t k = 32; k <= 160; k += 16) { 2047 GemmMicrokernelTester() 2048 .mr(3) 2049 .nr(8) 2050 .kr(2) 2051 .sr(1) 2052 .m(3) 2053 .n(8) 2054 .k(k) 2055 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2056 } 2057 } 2058 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_div_16_strided_a)2059 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_div_16_strided_a) { 2060 TEST_REQUIRES_ARM_NEON; 2061 for (size_t k = 32; k <= 160; k += 16) { 2062 GemmMicrokernelTester() 2063 .mr(3) 2064 .nr(8) 2065 .kr(2) 2066 .sr(1) 2067 .m(3) 2068 .n(8) 2069 .k(k) 2070 .a_stride(163) 2071 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2072 } 2073 } 2074 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_div_16_subtile)2075 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_div_16_subtile) { 2076 TEST_REQUIRES_ARM_NEON; 2077 for (size_t k = 32; k <= 160; k += 16) { 2078 for (uint32_t n = 1; n <= 8; n++) { 2079 for (uint32_t m = 1; m <= 3; m++) { 2080 GemmMicrokernelTester() 2081 .mr(3) 2082 .nr(8) 2083 .kr(2) 2084 .sr(1) 2085 .m(m) 2086 .n(n) 2087 .k(k) 2088 .iterations(1) 2089 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2090 } 2091 } 2092 } 2093 } 2094 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,n_gt_8)2095 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_gt_8) { 2096 TEST_REQUIRES_ARM_NEON; 2097 for (uint32_t n = 9; n < 16; n++) { 2098 for (size_t k = 1; k <= 80; k += 17) { 2099 GemmMicrokernelTester() 2100 .mr(3) 2101 .nr(8) 2102 .kr(2) 2103 .sr(1) 2104 .m(3) 2105 .n(n) 2106 .k(k) 2107 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2108 } 2109 } 2110 } 2111 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,n_gt_8_strided_cn)2112 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_gt_8_strided_cn) { 2113 TEST_REQUIRES_ARM_NEON; 2114 for (uint32_t n = 9; n < 16; n++) { 2115 for (size_t k = 1; k <= 80; k += 17) { 2116 GemmMicrokernelTester() 2117 .mr(3) 2118 .nr(8) 2119 .kr(2) 2120 .sr(1) 2121 .m(3) 2122 .n(n) 2123 .k(k) 2124 .cn_stride(11) 2125 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2126 } 2127 } 2128 } 2129 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,n_gt_8_strided_a)2130 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_gt_8_strided_a) { 2131 TEST_REQUIRES_ARM_NEON; 2132 for (uint32_t n = 9; n < 16; n++) { 2133 for (size_t k = 1; k <= 80; k += 17) { 2134 GemmMicrokernelTester() 2135 .mr(3) 2136 .nr(8) 2137 .kr(2) 2138 .sr(1) 2139 .m(3) 2140 .n(n) 2141 .k(k) 2142 .a_stride(83) 2143 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2144 } 2145 } 2146 } 2147 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,n_gt_8_subtile)2148 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_gt_8_subtile) { 2149 TEST_REQUIRES_ARM_NEON; 2150 for (uint32_t n = 9; n < 16; n++) { 2151 for (size_t k = 1; k <= 80; k += 17) { 2152 for (uint32_t m = 1; m <= 3; m++) { 2153 GemmMicrokernelTester() 2154 .mr(3) 2155 .nr(8) 2156 .kr(2) 2157 .sr(1) 2158 .m(m) 2159 .n(n) 2160 .k(k) 2161 .iterations(1) 2162 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2163 } 2164 } 2165 } 2166 } 2167 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,n_div_8)2168 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_div_8) { 2169 TEST_REQUIRES_ARM_NEON; 2170 for (uint32_t n = 16; n <= 24; n += 8) { 2171 for (size_t k = 1; k <= 80; k += 17) { 2172 GemmMicrokernelTester() 2173 .mr(3) 2174 .nr(8) 2175 .kr(2) 2176 .sr(1) 2177 .m(3) 2178 .n(n) 2179 .k(k) 2180 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2181 } 2182 } 2183 } 2184 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,n_div_8_strided_cn)2185 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_div_8_strided_cn) { 2186 TEST_REQUIRES_ARM_NEON; 2187 for (uint32_t n = 16; n <= 24; n += 8) { 2188 for (size_t k = 1; k <= 80; k += 17) { 2189 GemmMicrokernelTester() 2190 .mr(3) 2191 .nr(8) 2192 .kr(2) 2193 .sr(1) 2194 .m(3) 2195 .n(n) 2196 .k(k) 2197 .cn_stride(11) 2198 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2199 } 2200 } 2201 } 2202 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,n_div_8_strided_a)2203 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_div_8_strided_a) { 2204 TEST_REQUIRES_ARM_NEON; 2205 for (uint32_t n = 16; n <= 24; n += 8) { 2206 for (size_t k = 1; k <= 80; k += 17) { 2207 GemmMicrokernelTester() 2208 .mr(3) 2209 .nr(8) 2210 .kr(2) 2211 .sr(1) 2212 .m(3) 2213 .n(n) 2214 .k(k) 2215 .a_stride(83) 2216 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2217 } 2218 } 2219 } 2220 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,n_div_8_subtile)2221 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_div_8_subtile) { 2222 TEST_REQUIRES_ARM_NEON; 2223 for (uint32_t n = 16; n <= 24; n += 8) { 2224 for (size_t k = 1; k <= 80; k += 17) { 2225 for (uint32_t m = 1; m <= 3; m++) { 2226 GemmMicrokernelTester() 2227 .mr(3) 2228 .nr(8) 2229 .kr(2) 2230 .sr(1) 2231 .m(m) 2232 .n(n) 2233 .k(k) 2234 .iterations(1) 2235 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2236 } 2237 } 2238 } 2239 } 2240 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,strided_cm_subtile)2241 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, strided_cm_subtile) { 2242 TEST_REQUIRES_ARM_NEON; 2243 for (size_t k = 1; k <= 80; k += 17) { 2244 for (uint32_t n = 1; n <= 8; n++) { 2245 for (uint32_t m = 1; m <= 3; m++) { 2246 GemmMicrokernelTester() 2247 .mr(3) 2248 .nr(8) 2249 .kr(2) 2250 .sr(1) 2251 .m(m) 2252 .n(n) 2253 .k(k) 2254 .cm_stride(11) 2255 .iterations(1) 2256 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2257 } 2258 } 2259 } 2260 } 2261 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,qmin)2262 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, qmin) { 2263 TEST_REQUIRES_ARM_NEON; 2264 GemmMicrokernelTester() 2265 .mr(3) 2266 .nr(8) 2267 .kr(2) 2268 .sr(1) 2269 .m(3) 2270 .n(8) 2271 .k(16) 2272 .qmin(128) 2273 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2274 } 2275 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,qmax)2276 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, qmax) { 2277 TEST_REQUIRES_ARM_NEON; 2278 GemmMicrokernelTester() 2279 .mr(3) 2280 .nr(8) 2281 .kr(2) 2282 .sr(1) 2283 .m(3) 2284 .n(8) 2285 .k(16) 2286 .qmax(128) 2287 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2288 } 2289 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,strided_cm)2290 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, strided_cm) { 2291 TEST_REQUIRES_ARM_NEON; 2292 GemmMicrokernelTester() 2293 .mr(3) 2294 .nr(8) 2295 .kr(2) 2296 .sr(1) 2297 .m(3) 2298 .n(8) 2299 .k(16) 2300 .cm_stride(11) 2301 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2302 } 2303 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 2304 2305 2306 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_eq_16)2307 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_eq_16) { 2308 TEST_REQUIRES_ARM_NEON; 2309 GemmMicrokernelTester() 2310 .mr(4) 2311 .nr(8) 2312 .kr(2) 2313 .sr(1) 2314 .m(4) 2315 .n(8) 2316 .k(16) 2317 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2318 } 2319 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,strided_cn)2320 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, strided_cn) { 2321 TEST_REQUIRES_ARM_NEON; 2322 GemmMicrokernelTester() 2323 .mr(4) 2324 .nr(8) 2325 .kr(2) 2326 .sr(1) 2327 .m(4) 2328 .n(8) 2329 .k(16) 2330 .cn_stride(11) 2331 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2332 } 2333 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_eq_16_strided_a)2334 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_eq_16_strided_a) { 2335 TEST_REQUIRES_ARM_NEON; 2336 GemmMicrokernelTester() 2337 .mr(4) 2338 .nr(8) 2339 .kr(2) 2340 .sr(1) 2341 .m(4) 2342 .n(8) 2343 .k(16) 2344 .a_stride(19) 2345 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2346 } 2347 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_eq_16_subtile)2348 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_eq_16_subtile) { 2349 TEST_REQUIRES_ARM_NEON; 2350 for (uint32_t n = 1; n <= 8; n++) { 2351 for (uint32_t m = 1; m <= 4; m++) { 2352 GemmMicrokernelTester() 2353 .mr(4) 2354 .nr(8) 2355 .kr(2) 2356 .sr(1) 2357 .m(m) 2358 .n(n) 2359 .k(16) 2360 .iterations(1) 2361 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2362 } 2363 } 2364 } 2365 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_m)2366 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) { 2367 TEST_REQUIRES_ARM_NEON; 2368 for (uint32_t m = 1; m <= 4; m++) { 2369 GemmMicrokernelTester() 2370 .mr(4) 2371 .nr(8) 2372 .kr(2) 2373 .sr(1) 2374 .m(m) 2375 .n(8) 2376 .k(16) 2377 .iterations(1) 2378 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2379 } 2380 } 2381 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_n)2382 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) { 2383 TEST_REQUIRES_ARM_NEON; 2384 for (uint32_t n = 1; n <= 8; n++) { 2385 GemmMicrokernelTester() 2386 .mr(4) 2387 .nr(8) 2388 .kr(2) 2389 .sr(1) 2390 .m(4) 2391 .n(n) 2392 .k(16) 2393 .iterations(1) 2394 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2395 } 2396 } 2397 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_lt_16)2398 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_lt_16) { 2399 TEST_REQUIRES_ARM_NEON; 2400 for (size_t k = 1; k < 16; k++) { 2401 GemmMicrokernelTester() 2402 .mr(4) 2403 .nr(8) 2404 .kr(2) 2405 .sr(1) 2406 .m(4) 2407 .n(8) 2408 .k(k) 2409 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2410 } 2411 } 2412 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_lt_16_strided_a)2413 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_lt_16_strided_a) { 2414 TEST_REQUIRES_ARM_NEON; 2415 for (size_t k = 1; k < 16; k++) { 2416 GemmMicrokernelTester() 2417 .mr(4) 2418 .nr(8) 2419 .kr(2) 2420 .sr(1) 2421 .m(4) 2422 .n(8) 2423 .k(k) 2424 .a_stride(19) 2425 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2426 } 2427 } 2428 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_lt_16_subtile)2429 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_lt_16_subtile) { 2430 TEST_REQUIRES_ARM_NEON; 2431 for (size_t k = 1; k < 16; k++) { 2432 for (uint32_t n = 1; n <= 8; n++) { 2433 for (uint32_t m = 1; m <= 4; m++) { 2434 GemmMicrokernelTester() 2435 .mr(4) 2436 .nr(8) 2437 .kr(2) 2438 .sr(1) 2439 .m(m) 2440 .n(n) 2441 .k(k) 2442 .iterations(1) 2443 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2444 } 2445 } 2446 } 2447 } 2448 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_gt_16)2449 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_gt_16) { 2450 TEST_REQUIRES_ARM_NEON; 2451 for (size_t k = 17; k < 32; k++) { 2452 GemmMicrokernelTester() 2453 .mr(4) 2454 .nr(8) 2455 .kr(2) 2456 .sr(1) 2457 .m(4) 2458 .n(8) 2459 .k(k) 2460 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2461 } 2462 } 2463 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_gt_16_strided_a)2464 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_gt_16_strided_a) { 2465 TEST_REQUIRES_ARM_NEON; 2466 for (size_t k = 17; k < 32; k++) { 2467 GemmMicrokernelTester() 2468 .mr(4) 2469 .nr(8) 2470 .kr(2) 2471 .sr(1) 2472 .m(4) 2473 .n(8) 2474 .k(k) 2475 .a_stride(37) 2476 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2477 } 2478 } 2479 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_gt_16_subtile)2480 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_gt_16_subtile) { 2481 TEST_REQUIRES_ARM_NEON; 2482 for (size_t k = 17; k < 32; k++) { 2483 for (uint32_t n = 1; n <= 8; n++) { 2484 for (uint32_t m = 1; m <= 4; m++) { 2485 GemmMicrokernelTester() 2486 .mr(4) 2487 .nr(8) 2488 .kr(2) 2489 .sr(1) 2490 .m(m) 2491 .n(n) 2492 .k(k) 2493 .iterations(1) 2494 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2495 } 2496 } 2497 } 2498 } 2499 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_div_16)2500 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_div_16) { 2501 TEST_REQUIRES_ARM_NEON; 2502 for (size_t k = 32; k <= 160; k += 16) { 2503 GemmMicrokernelTester() 2504 .mr(4) 2505 .nr(8) 2506 .kr(2) 2507 .sr(1) 2508 .m(4) 2509 .n(8) 2510 .k(k) 2511 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2512 } 2513 } 2514 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_div_16_strided_a)2515 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_div_16_strided_a) { 2516 TEST_REQUIRES_ARM_NEON; 2517 for (size_t k = 32; k <= 160; k += 16) { 2518 GemmMicrokernelTester() 2519 .mr(4) 2520 .nr(8) 2521 .kr(2) 2522 .sr(1) 2523 .m(4) 2524 .n(8) 2525 .k(k) 2526 .a_stride(163) 2527 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2528 } 2529 } 2530 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_div_16_subtile)2531 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_div_16_subtile) { 2532 TEST_REQUIRES_ARM_NEON; 2533 for (size_t k = 32; k <= 160; k += 16) { 2534 for (uint32_t n = 1; n <= 8; n++) { 2535 for (uint32_t m = 1; m <= 4; m++) { 2536 GemmMicrokernelTester() 2537 .mr(4) 2538 .nr(8) 2539 .kr(2) 2540 .sr(1) 2541 .m(m) 2542 .n(n) 2543 .k(k) 2544 .iterations(1) 2545 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2546 } 2547 } 2548 } 2549 } 2550 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,n_gt_8)2551 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_gt_8) { 2552 TEST_REQUIRES_ARM_NEON; 2553 for (uint32_t n = 9; n < 16; n++) { 2554 for (size_t k = 1; k <= 80; k += 17) { 2555 GemmMicrokernelTester() 2556 .mr(4) 2557 .nr(8) 2558 .kr(2) 2559 .sr(1) 2560 .m(4) 2561 .n(n) 2562 .k(k) 2563 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2564 } 2565 } 2566 } 2567 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,n_gt_8_strided_cn)2568 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_gt_8_strided_cn) { 2569 TEST_REQUIRES_ARM_NEON; 2570 for (uint32_t n = 9; n < 16; n++) { 2571 for (size_t k = 1; k <= 80; k += 17) { 2572 GemmMicrokernelTester() 2573 .mr(4) 2574 .nr(8) 2575 .kr(2) 2576 .sr(1) 2577 .m(4) 2578 .n(n) 2579 .k(k) 2580 .cn_stride(11) 2581 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2582 } 2583 } 2584 } 2585 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,n_gt_8_strided_a)2586 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_gt_8_strided_a) { 2587 TEST_REQUIRES_ARM_NEON; 2588 for (uint32_t n = 9; n < 16; n++) { 2589 for (size_t k = 1; k <= 80; k += 17) { 2590 GemmMicrokernelTester() 2591 .mr(4) 2592 .nr(8) 2593 .kr(2) 2594 .sr(1) 2595 .m(4) 2596 .n(n) 2597 .k(k) 2598 .a_stride(83) 2599 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2600 } 2601 } 2602 } 2603 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,n_gt_8_subtile)2604 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_gt_8_subtile) { 2605 TEST_REQUIRES_ARM_NEON; 2606 for (uint32_t n = 9; n < 16; n++) { 2607 for (size_t k = 1; k <= 80; k += 17) { 2608 for (uint32_t m = 1; m <= 4; m++) { 2609 GemmMicrokernelTester() 2610 .mr(4) 2611 .nr(8) 2612 .kr(2) 2613 .sr(1) 2614 .m(m) 2615 .n(n) 2616 .k(k) 2617 .iterations(1) 2618 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2619 } 2620 } 2621 } 2622 } 2623 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,n_div_8)2624 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_div_8) { 2625 TEST_REQUIRES_ARM_NEON; 2626 for (uint32_t n = 16; n <= 24; n += 8) { 2627 for (size_t k = 1; k <= 80; k += 17) { 2628 GemmMicrokernelTester() 2629 .mr(4) 2630 .nr(8) 2631 .kr(2) 2632 .sr(1) 2633 .m(4) 2634 .n(n) 2635 .k(k) 2636 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2637 } 2638 } 2639 } 2640 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,n_div_8_strided_cn)2641 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_div_8_strided_cn) { 2642 TEST_REQUIRES_ARM_NEON; 2643 for (uint32_t n = 16; n <= 24; n += 8) { 2644 for (size_t k = 1; k <= 80; k += 17) { 2645 GemmMicrokernelTester() 2646 .mr(4) 2647 .nr(8) 2648 .kr(2) 2649 .sr(1) 2650 .m(4) 2651 .n(n) 2652 .k(k) 2653 .cn_stride(11) 2654 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2655 } 2656 } 2657 } 2658 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,n_div_8_strided_a)2659 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_div_8_strided_a) { 2660 TEST_REQUIRES_ARM_NEON; 2661 for (uint32_t n = 16; n <= 24; n += 8) { 2662 for (size_t k = 1; k <= 80; k += 17) { 2663 GemmMicrokernelTester() 2664 .mr(4) 2665 .nr(8) 2666 .kr(2) 2667 .sr(1) 2668 .m(4) 2669 .n(n) 2670 .k(k) 2671 .a_stride(83) 2672 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2673 } 2674 } 2675 } 2676 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,n_div_8_subtile)2677 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_div_8_subtile) { 2678 TEST_REQUIRES_ARM_NEON; 2679 for (uint32_t n = 16; n <= 24; n += 8) { 2680 for (size_t k = 1; k <= 80; k += 17) { 2681 for (uint32_t m = 1; m <= 4; m++) { 2682 GemmMicrokernelTester() 2683 .mr(4) 2684 .nr(8) 2685 .kr(2) 2686 .sr(1) 2687 .m(m) 2688 .n(n) 2689 .k(k) 2690 .iterations(1) 2691 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2692 } 2693 } 2694 } 2695 } 2696 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,strided_cm_subtile)2697 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, strided_cm_subtile) { 2698 TEST_REQUIRES_ARM_NEON; 2699 for (size_t k = 1; k <= 80; k += 17) { 2700 for (uint32_t n = 1; n <= 8; n++) { 2701 for (uint32_t m = 1; m <= 4; m++) { 2702 GemmMicrokernelTester() 2703 .mr(4) 2704 .nr(8) 2705 .kr(2) 2706 .sr(1) 2707 .m(m) 2708 .n(n) 2709 .k(k) 2710 .cm_stride(11) 2711 .iterations(1) 2712 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2713 } 2714 } 2715 } 2716 } 2717 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,qmin)2718 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, qmin) { 2719 TEST_REQUIRES_ARM_NEON; 2720 GemmMicrokernelTester() 2721 .mr(4) 2722 .nr(8) 2723 .kr(2) 2724 .sr(1) 2725 .m(4) 2726 .n(8) 2727 .k(16) 2728 .qmin(128) 2729 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2730 } 2731 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,qmax)2732 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, qmax) { 2733 TEST_REQUIRES_ARM_NEON; 2734 GemmMicrokernelTester() 2735 .mr(4) 2736 .nr(8) 2737 .kr(2) 2738 .sr(1) 2739 .m(4) 2740 .n(8) 2741 .k(16) 2742 .qmax(128) 2743 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2744 } 2745 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,strided_cm)2746 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, strided_cm) { 2747 TEST_REQUIRES_ARM_NEON; 2748 GemmMicrokernelTester() 2749 .mr(4) 2750 .nr(8) 2751 .kr(2) 2752 .sr(1) 2753 .m(4) 2754 .n(8) 2755 .k(16) 2756 .cm_stride(11) 2757 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2758 } 2759 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 2760 2761 2762 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_eq_8)2763 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_eq_8) { 2764 TEST_REQUIRES_ARM_NEON; 2765 GemmMicrokernelTester() 2766 .mr(4) 2767 .nr(16) 2768 .kr(2) 2769 .sr(1) 2770 .m(4) 2771 .n(16) 2772 .k(8) 2773 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2774 } 2775 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,strided_cn)2776 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, strided_cn) { 2777 TEST_REQUIRES_ARM_NEON; 2778 GemmMicrokernelTester() 2779 .mr(4) 2780 .nr(16) 2781 .kr(2) 2782 .sr(1) 2783 .m(4) 2784 .n(16) 2785 .k(8) 2786 .cn_stride(19) 2787 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2788 } 2789 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_eq_8_strided_a)2790 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_eq_8_strided_a) { 2791 TEST_REQUIRES_ARM_NEON; 2792 GemmMicrokernelTester() 2793 .mr(4) 2794 .nr(16) 2795 .kr(2) 2796 .sr(1) 2797 .m(4) 2798 .n(16) 2799 .k(8) 2800 .a_stride(11) 2801 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2802 } 2803 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_eq_8_subtile)2804 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_eq_8_subtile) { 2805 TEST_REQUIRES_ARM_NEON; 2806 for (uint32_t n = 1; n <= 16; n++) { 2807 for (uint32_t m = 1; m <= 4; m++) { 2808 GemmMicrokernelTester() 2809 .mr(4) 2810 .nr(16) 2811 .kr(2) 2812 .sr(1) 2813 .m(m) 2814 .n(n) 2815 .k(8) 2816 .iterations(1) 2817 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2818 } 2819 } 2820 } 2821 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_eq_8_subtile_m)2822 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_eq_8_subtile_m) { 2823 TEST_REQUIRES_ARM_NEON; 2824 for (uint32_t m = 1; m <= 4; m++) { 2825 GemmMicrokernelTester() 2826 .mr(4) 2827 .nr(16) 2828 .kr(2) 2829 .sr(1) 2830 .m(m) 2831 .n(16) 2832 .k(8) 2833 .iterations(1) 2834 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2835 } 2836 } 2837 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_eq_8_subtile_n)2838 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_eq_8_subtile_n) { 2839 TEST_REQUIRES_ARM_NEON; 2840 for (uint32_t n = 1; n <= 16; n++) { 2841 GemmMicrokernelTester() 2842 .mr(4) 2843 .nr(16) 2844 .kr(2) 2845 .sr(1) 2846 .m(4) 2847 .n(n) 2848 .k(8) 2849 .iterations(1) 2850 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2851 } 2852 } 2853 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_lt_8)2854 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_lt_8) { 2855 TEST_REQUIRES_ARM_NEON; 2856 for (size_t k = 1; k < 8; k++) { 2857 GemmMicrokernelTester() 2858 .mr(4) 2859 .nr(16) 2860 .kr(2) 2861 .sr(1) 2862 .m(4) 2863 .n(16) 2864 .k(k) 2865 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2866 } 2867 } 2868 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_lt_8_strided_a)2869 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_lt_8_strided_a) { 2870 TEST_REQUIRES_ARM_NEON; 2871 for (size_t k = 1; k < 8; k++) { 2872 GemmMicrokernelTester() 2873 .mr(4) 2874 .nr(16) 2875 .kr(2) 2876 .sr(1) 2877 .m(4) 2878 .n(16) 2879 .k(k) 2880 .a_stride(11) 2881 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2882 } 2883 } 2884 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_lt_8_subtile)2885 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_lt_8_subtile) { 2886 TEST_REQUIRES_ARM_NEON; 2887 for (size_t k = 1; k < 8; k++) { 2888 for (uint32_t n = 1; n <= 16; n++) { 2889 for (uint32_t m = 1; m <= 4; m++) { 2890 GemmMicrokernelTester() 2891 .mr(4) 2892 .nr(16) 2893 .kr(2) 2894 .sr(1) 2895 .m(m) 2896 .n(n) 2897 .k(k) 2898 .iterations(1) 2899 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2900 } 2901 } 2902 } 2903 } 2904 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_gt_8)2905 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_gt_8) { 2906 TEST_REQUIRES_ARM_NEON; 2907 for (size_t k = 9; k < 16; k++) { 2908 GemmMicrokernelTester() 2909 .mr(4) 2910 .nr(16) 2911 .kr(2) 2912 .sr(1) 2913 .m(4) 2914 .n(16) 2915 .k(k) 2916 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2917 } 2918 } 2919 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_gt_8_strided_a)2920 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_gt_8_strided_a) { 2921 TEST_REQUIRES_ARM_NEON; 2922 for (size_t k = 9; k < 16; k++) { 2923 GemmMicrokernelTester() 2924 .mr(4) 2925 .nr(16) 2926 .kr(2) 2927 .sr(1) 2928 .m(4) 2929 .n(16) 2930 .k(k) 2931 .a_stride(19) 2932 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2933 } 2934 } 2935 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_gt_8_subtile)2936 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_gt_8_subtile) { 2937 TEST_REQUIRES_ARM_NEON; 2938 for (size_t k = 9; k < 16; k++) { 2939 for (uint32_t n = 1; n <= 16; n++) { 2940 for (uint32_t m = 1; m <= 4; m++) { 2941 GemmMicrokernelTester() 2942 .mr(4) 2943 .nr(16) 2944 .kr(2) 2945 .sr(1) 2946 .m(m) 2947 .n(n) 2948 .k(k) 2949 .iterations(1) 2950 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2951 } 2952 } 2953 } 2954 } 2955 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_div_8)2956 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_div_8) { 2957 TEST_REQUIRES_ARM_NEON; 2958 for (size_t k = 16; k <= 80; k += 8) { 2959 GemmMicrokernelTester() 2960 .mr(4) 2961 .nr(16) 2962 .kr(2) 2963 .sr(1) 2964 .m(4) 2965 .n(16) 2966 .k(k) 2967 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2968 } 2969 } 2970 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_div_8_strided_a)2971 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_div_8_strided_a) { 2972 TEST_REQUIRES_ARM_NEON; 2973 for (size_t k = 16; k <= 80; k += 8) { 2974 GemmMicrokernelTester() 2975 .mr(4) 2976 .nr(16) 2977 .kr(2) 2978 .sr(1) 2979 .m(4) 2980 .n(16) 2981 .k(k) 2982 .a_stride(83) 2983 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2984 } 2985 } 2986 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_div_8_subtile)2987 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_div_8_subtile) { 2988 TEST_REQUIRES_ARM_NEON; 2989 for (size_t k = 16; k <= 80; k += 8) { 2990 for (uint32_t n = 1; n <= 16; n++) { 2991 for (uint32_t m = 1; m <= 4; m++) { 2992 GemmMicrokernelTester() 2993 .mr(4) 2994 .nr(16) 2995 .kr(2) 2996 .sr(1) 2997 .m(m) 2998 .n(n) 2999 .k(k) 3000 .iterations(1) 3001 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3002 } 3003 } 3004 } 3005 } 3006 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,n_gt_16)3007 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_gt_16) { 3008 TEST_REQUIRES_ARM_NEON; 3009 for (uint32_t n = 17; n < 32; n++) { 3010 for (size_t k = 1; k <= 40; k += 9) { 3011 GemmMicrokernelTester() 3012 .mr(4) 3013 .nr(16) 3014 .kr(2) 3015 .sr(1) 3016 .m(4) 3017 .n(n) 3018 .k(k) 3019 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3020 } 3021 } 3022 } 3023 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,n_gt_16_strided_cn)3024 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_gt_16_strided_cn) { 3025 TEST_REQUIRES_ARM_NEON; 3026 for (uint32_t n = 17; n < 32; n++) { 3027 for (size_t k = 1; k <= 40; k += 9) { 3028 GemmMicrokernelTester() 3029 .mr(4) 3030 .nr(16) 3031 .kr(2) 3032 .sr(1) 3033 .m(4) 3034 .n(n) 3035 .k(k) 3036 .cn_stride(19) 3037 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3038 } 3039 } 3040 } 3041 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,n_gt_16_strided_a)3042 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_gt_16_strided_a) { 3043 TEST_REQUIRES_ARM_NEON; 3044 for (uint32_t n = 17; n < 32; n++) { 3045 for (size_t k = 1; k <= 40; k += 9) { 3046 GemmMicrokernelTester() 3047 .mr(4) 3048 .nr(16) 3049 .kr(2) 3050 .sr(1) 3051 .m(4) 3052 .n(n) 3053 .k(k) 3054 .a_stride(43) 3055 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3056 } 3057 } 3058 } 3059 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,n_gt_16_subtile)3060 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_gt_16_subtile) { 3061 TEST_REQUIRES_ARM_NEON; 3062 for (uint32_t n = 17; n < 32; n++) { 3063 for (size_t k = 1; k <= 40; k += 9) { 3064 for (uint32_t m = 1; m <= 4; m++) { 3065 GemmMicrokernelTester() 3066 .mr(4) 3067 .nr(16) 3068 .kr(2) 3069 .sr(1) 3070 .m(m) 3071 .n(n) 3072 .k(k) 3073 .iterations(1) 3074 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3075 } 3076 } 3077 } 3078 } 3079 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,n_div_16)3080 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_div_16) { 3081 TEST_REQUIRES_ARM_NEON; 3082 for (uint32_t n = 32; n <= 48; n += 16) { 3083 for (size_t k = 1; k <= 40; k += 9) { 3084 GemmMicrokernelTester() 3085 .mr(4) 3086 .nr(16) 3087 .kr(2) 3088 .sr(1) 3089 .m(4) 3090 .n(n) 3091 .k(k) 3092 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3093 } 3094 } 3095 } 3096 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,n_div_16_strided_cn)3097 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_div_16_strided_cn) { 3098 TEST_REQUIRES_ARM_NEON; 3099 for (uint32_t n = 32; n <= 48; n += 16) { 3100 for (size_t k = 1; k <= 40; k += 9) { 3101 GemmMicrokernelTester() 3102 .mr(4) 3103 .nr(16) 3104 .kr(2) 3105 .sr(1) 3106 .m(4) 3107 .n(n) 3108 .k(k) 3109 .cn_stride(19) 3110 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3111 } 3112 } 3113 } 3114 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,n_div_16_strided_a)3115 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_div_16_strided_a) { 3116 TEST_REQUIRES_ARM_NEON; 3117 for (uint32_t n = 32; n <= 48; n += 16) { 3118 for (size_t k = 1; k <= 40; k += 9) { 3119 GemmMicrokernelTester() 3120 .mr(4) 3121 .nr(16) 3122 .kr(2) 3123 .sr(1) 3124 .m(4) 3125 .n(n) 3126 .k(k) 3127 .a_stride(43) 3128 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3129 } 3130 } 3131 } 3132 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,n_div_16_subtile)3133 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_div_16_subtile) { 3134 TEST_REQUIRES_ARM_NEON; 3135 for (uint32_t n = 32; n <= 48; n += 16) { 3136 for (size_t k = 1; k <= 40; k += 9) { 3137 for (uint32_t m = 1; m <= 4; m++) { 3138 GemmMicrokernelTester() 3139 .mr(4) 3140 .nr(16) 3141 .kr(2) 3142 .sr(1) 3143 .m(m) 3144 .n(n) 3145 .k(k) 3146 .iterations(1) 3147 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3148 } 3149 } 3150 } 3151 } 3152 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,strided_cm_subtile)3153 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, strided_cm_subtile) { 3154 TEST_REQUIRES_ARM_NEON; 3155 for (size_t k = 1; k <= 40; k += 9) { 3156 for (uint32_t n = 1; n <= 16; n++) { 3157 for (uint32_t m = 1; m <= 4; m++) { 3158 GemmMicrokernelTester() 3159 .mr(4) 3160 .nr(16) 3161 .kr(2) 3162 .sr(1) 3163 .m(m) 3164 .n(n) 3165 .k(k) 3166 .cm_stride(19) 3167 .iterations(1) 3168 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3169 } 3170 } 3171 } 3172 } 3173 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,qmin)3174 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, qmin) { 3175 TEST_REQUIRES_ARM_NEON; 3176 GemmMicrokernelTester() 3177 .mr(4) 3178 .nr(16) 3179 .kr(2) 3180 .sr(1) 3181 .m(4) 3182 .n(16) 3183 .k(8) 3184 .qmin(128) 3185 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3186 } 3187 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,qmax)3188 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, qmax) { 3189 TEST_REQUIRES_ARM_NEON; 3190 GemmMicrokernelTester() 3191 .mr(4) 3192 .nr(16) 3193 .kr(2) 3194 .sr(1) 3195 .m(4) 3196 .n(16) 3197 .k(8) 3198 .qmax(128) 3199 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3200 } 3201 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,strided_cm)3202 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, strided_cm) { 3203 TEST_REQUIRES_ARM_NEON; 3204 GemmMicrokernelTester() 3205 .mr(4) 3206 .nr(16) 3207 .kr(2) 3208 .sr(1) 3209 .m(4) 3210 .n(16) 3211 .k(8) 3212 .cm_stride(19) 3213 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3214 } 3215 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 3216 3217 3218 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_eq_16)3219 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_eq_16) { 3220 TEST_REQUIRES_ARM_NEON; 3221 GemmMicrokernelTester() 3222 .mr(1) 3223 .nr(16) 3224 .kr(2) 3225 .sr(1) 3226 .m(1) 3227 .n(16) 3228 .k(16) 3229 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3230 } 3231 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,strided_cn)3232 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, strided_cn) { 3233 TEST_REQUIRES_ARM_NEON; 3234 GemmMicrokernelTester() 3235 .mr(1) 3236 .nr(16) 3237 .kr(2) 3238 .sr(1) 3239 .m(1) 3240 .n(16) 3241 .k(16) 3242 .cn_stride(19) 3243 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3244 } 3245 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_eq_16_strided_a)3246 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_eq_16_strided_a) { 3247 TEST_REQUIRES_ARM_NEON; 3248 GemmMicrokernelTester() 3249 .mr(1) 3250 .nr(16) 3251 .kr(2) 3252 .sr(1) 3253 .m(1) 3254 .n(16) 3255 .k(16) 3256 .a_stride(19) 3257 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3258 } 3259 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_eq_16_subtile)3260 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_eq_16_subtile) { 3261 TEST_REQUIRES_ARM_NEON; 3262 for (uint32_t n = 1; n <= 16; n++) { 3263 for (uint32_t m = 1; m <= 1; m++) { 3264 GemmMicrokernelTester() 3265 .mr(1) 3266 .nr(16) 3267 .kr(2) 3268 .sr(1) 3269 .m(m) 3270 .n(n) 3271 .k(16) 3272 .iterations(1) 3273 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3274 } 3275 } 3276 } 3277 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_eq_16_subtile_m)3278 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) { 3279 TEST_REQUIRES_ARM_NEON; 3280 for (uint32_t m = 1; m <= 1; m++) { 3281 GemmMicrokernelTester() 3282 .mr(1) 3283 .nr(16) 3284 .kr(2) 3285 .sr(1) 3286 .m(m) 3287 .n(16) 3288 .k(16) 3289 .iterations(1) 3290 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3291 } 3292 } 3293 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_eq_16_subtile_n)3294 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) { 3295 TEST_REQUIRES_ARM_NEON; 3296 for (uint32_t n = 1; n <= 16; n++) { 3297 GemmMicrokernelTester() 3298 .mr(1) 3299 .nr(16) 3300 .kr(2) 3301 .sr(1) 3302 .m(1) 3303 .n(n) 3304 .k(16) 3305 .iterations(1) 3306 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3307 } 3308 } 3309 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_lt_16)3310 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_lt_16) { 3311 TEST_REQUIRES_ARM_NEON; 3312 for (size_t k = 1; k < 16; k++) { 3313 GemmMicrokernelTester() 3314 .mr(1) 3315 .nr(16) 3316 .kr(2) 3317 .sr(1) 3318 .m(1) 3319 .n(16) 3320 .k(k) 3321 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3322 } 3323 } 3324 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_lt_16_strided_a)3325 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_lt_16_strided_a) { 3326 TEST_REQUIRES_ARM_NEON; 3327 for (size_t k = 1; k < 16; k++) { 3328 GemmMicrokernelTester() 3329 .mr(1) 3330 .nr(16) 3331 .kr(2) 3332 .sr(1) 3333 .m(1) 3334 .n(16) 3335 .k(k) 3336 .a_stride(19) 3337 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3338 } 3339 } 3340 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_lt_16_subtile)3341 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_lt_16_subtile) { 3342 TEST_REQUIRES_ARM_NEON; 3343 for (size_t k = 1; k < 16; k++) { 3344 for (uint32_t n = 1; n <= 16; n++) { 3345 for (uint32_t m = 1; m <= 1; m++) { 3346 GemmMicrokernelTester() 3347 .mr(1) 3348 .nr(16) 3349 .kr(2) 3350 .sr(1) 3351 .m(m) 3352 .n(n) 3353 .k(k) 3354 .iterations(1) 3355 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3356 } 3357 } 3358 } 3359 } 3360 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_gt_16)3361 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_gt_16) { 3362 TEST_REQUIRES_ARM_NEON; 3363 for (size_t k = 17; k < 32; k++) { 3364 GemmMicrokernelTester() 3365 .mr(1) 3366 .nr(16) 3367 .kr(2) 3368 .sr(1) 3369 .m(1) 3370 .n(16) 3371 .k(k) 3372 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3373 } 3374 } 3375 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_gt_16_strided_a)3376 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_gt_16_strided_a) { 3377 TEST_REQUIRES_ARM_NEON; 3378 for (size_t k = 17; k < 32; k++) { 3379 GemmMicrokernelTester() 3380 .mr(1) 3381 .nr(16) 3382 .kr(2) 3383 .sr(1) 3384 .m(1) 3385 .n(16) 3386 .k(k) 3387 .a_stride(37) 3388 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3389 } 3390 } 3391 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_gt_16_subtile)3392 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_gt_16_subtile) { 3393 TEST_REQUIRES_ARM_NEON; 3394 for (size_t k = 17; k < 32; k++) { 3395 for (uint32_t n = 1; n <= 16; n++) { 3396 for (uint32_t m = 1; m <= 1; m++) { 3397 GemmMicrokernelTester() 3398 .mr(1) 3399 .nr(16) 3400 .kr(2) 3401 .sr(1) 3402 .m(m) 3403 .n(n) 3404 .k(k) 3405 .iterations(1) 3406 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3407 } 3408 } 3409 } 3410 } 3411 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_div_16)3412 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_div_16) { 3413 TEST_REQUIRES_ARM_NEON; 3414 for (size_t k = 32; k <= 160; k += 16) { 3415 GemmMicrokernelTester() 3416 .mr(1) 3417 .nr(16) 3418 .kr(2) 3419 .sr(1) 3420 .m(1) 3421 .n(16) 3422 .k(k) 3423 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3424 } 3425 } 3426 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_div_16_strided_a)3427 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_div_16_strided_a) { 3428 TEST_REQUIRES_ARM_NEON; 3429 for (size_t k = 32; k <= 160; k += 16) { 3430 GemmMicrokernelTester() 3431 .mr(1) 3432 .nr(16) 3433 .kr(2) 3434 .sr(1) 3435 .m(1) 3436 .n(16) 3437 .k(k) 3438 .a_stride(163) 3439 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3440 } 3441 } 3442 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_div_16_subtile)3443 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_div_16_subtile) { 3444 TEST_REQUIRES_ARM_NEON; 3445 for (size_t k = 32; k <= 160; k += 16) { 3446 for (uint32_t n = 1; n <= 16; n++) { 3447 for (uint32_t m = 1; m <= 1; m++) { 3448 GemmMicrokernelTester() 3449 .mr(1) 3450 .nr(16) 3451 .kr(2) 3452 .sr(1) 3453 .m(m) 3454 .n(n) 3455 .k(k) 3456 .iterations(1) 3457 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3458 } 3459 } 3460 } 3461 } 3462 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,n_gt_16)3463 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_gt_16) { 3464 TEST_REQUIRES_ARM_NEON; 3465 for (uint32_t n = 17; n < 32; n++) { 3466 for (size_t k = 1; k <= 80; k += 17) { 3467 GemmMicrokernelTester() 3468 .mr(1) 3469 .nr(16) 3470 .kr(2) 3471 .sr(1) 3472 .m(1) 3473 .n(n) 3474 .k(k) 3475 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3476 } 3477 } 3478 } 3479 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,n_gt_16_strided_cn)3480 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_gt_16_strided_cn) { 3481 TEST_REQUIRES_ARM_NEON; 3482 for (uint32_t n = 17; n < 32; n++) { 3483 for (size_t k = 1; k <= 80; k += 17) { 3484 GemmMicrokernelTester() 3485 .mr(1) 3486 .nr(16) 3487 .kr(2) 3488 .sr(1) 3489 .m(1) 3490 .n(n) 3491 .k(k) 3492 .cn_stride(19) 3493 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3494 } 3495 } 3496 } 3497 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,n_gt_16_strided_a)3498 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_gt_16_strided_a) { 3499 TEST_REQUIRES_ARM_NEON; 3500 for (uint32_t n = 17; n < 32; n++) { 3501 for (size_t k = 1; k <= 80; k += 17) { 3502 GemmMicrokernelTester() 3503 .mr(1) 3504 .nr(16) 3505 .kr(2) 3506 .sr(1) 3507 .m(1) 3508 .n(n) 3509 .k(k) 3510 .a_stride(83) 3511 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3512 } 3513 } 3514 } 3515 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,n_gt_16_subtile)3516 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_gt_16_subtile) { 3517 TEST_REQUIRES_ARM_NEON; 3518 for (uint32_t n = 17; n < 32; n++) { 3519 for (size_t k = 1; k <= 80; k += 17) { 3520 for (uint32_t m = 1; m <= 1; m++) { 3521 GemmMicrokernelTester() 3522 .mr(1) 3523 .nr(16) 3524 .kr(2) 3525 .sr(1) 3526 .m(m) 3527 .n(n) 3528 .k(k) 3529 .iterations(1) 3530 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3531 } 3532 } 3533 } 3534 } 3535 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,n_div_16)3536 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_div_16) { 3537 TEST_REQUIRES_ARM_NEON; 3538 for (uint32_t n = 32; n <= 48; n += 16) { 3539 for (size_t k = 1; k <= 80; k += 17) { 3540 GemmMicrokernelTester() 3541 .mr(1) 3542 .nr(16) 3543 .kr(2) 3544 .sr(1) 3545 .m(1) 3546 .n(n) 3547 .k(k) 3548 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3549 } 3550 } 3551 } 3552 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,n_div_16_strided_cn)3553 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_div_16_strided_cn) { 3554 TEST_REQUIRES_ARM_NEON; 3555 for (uint32_t n = 32; n <= 48; n += 16) { 3556 for (size_t k = 1; k <= 80; k += 17) { 3557 GemmMicrokernelTester() 3558 .mr(1) 3559 .nr(16) 3560 .kr(2) 3561 .sr(1) 3562 .m(1) 3563 .n(n) 3564 .k(k) 3565 .cn_stride(19) 3566 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3567 } 3568 } 3569 } 3570 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,n_div_16_strided_a)3571 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_div_16_strided_a) { 3572 TEST_REQUIRES_ARM_NEON; 3573 for (uint32_t n = 32; n <= 48; n += 16) { 3574 for (size_t k = 1; k <= 80; k += 17) { 3575 GemmMicrokernelTester() 3576 .mr(1) 3577 .nr(16) 3578 .kr(2) 3579 .sr(1) 3580 .m(1) 3581 .n(n) 3582 .k(k) 3583 .a_stride(83) 3584 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3585 } 3586 } 3587 } 3588 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,n_div_16_subtile)3589 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_div_16_subtile) { 3590 TEST_REQUIRES_ARM_NEON; 3591 for (uint32_t n = 32; n <= 48; n += 16) { 3592 for (size_t k = 1; k <= 80; k += 17) { 3593 for (uint32_t m = 1; m <= 1; m++) { 3594 GemmMicrokernelTester() 3595 .mr(1) 3596 .nr(16) 3597 .kr(2) 3598 .sr(1) 3599 .m(m) 3600 .n(n) 3601 .k(k) 3602 .iterations(1) 3603 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3604 } 3605 } 3606 } 3607 } 3608 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,strided_cm_subtile)3609 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, strided_cm_subtile) { 3610 TEST_REQUIRES_ARM_NEON; 3611 for (size_t k = 1; k <= 80; k += 17) { 3612 for (uint32_t n = 1; n <= 16; n++) { 3613 for (uint32_t m = 1; m <= 1; m++) { 3614 GemmMicrokernelTester() 3615 .mr(1) 3616 .nr(16) 3617 .kr(2) 3618 .sr(1) 3619 .m(m) 3620 .n(n) 3621 .k(k) 3622 .cm_stride(19) 3623 .iterations(1) 3624 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3625 } 3626 } 3627 } 3628 } 3629 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,qmin)3630 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, qmin) { 3631 TEST_REQUIRES_ARM_NEON; 3632 GemmMicrokernelTester() 3633 .mr(1) 3634 .nr(16) 3635 .kr(2) 3636 .sr(1) 3637 .m(1) 3638 .n(16) 3639 .k(16) 3640 .qmin(128) 3641 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3642 } 3643 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,qmax)3644 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, qmax) { 3645 TEST_REQUIRES_ARM_NEON; 3646 GemmMicrokernelTester() 3647 .mr(1) 3648 .nr(16) 3649 .kr(2) 3650 .sr(1) 3651 .m(1) 3652 .n(16) 3653 .k(16) 3654 .qmax(128) 3655 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3656 } 3657 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,strided_cm)3658 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, strided_cm) { 3659 TEST_REQUIRES_ARM_NEON; 3660 GemmMicrokernelTester() 3661 .mr(1) 3662 .nr(16) 3663 .kr(2) 3664 .sr(1) 3665 .m(1) 3666 .n(16) 3667 .k(16) 3668 .cm_stride(19) 3669 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3670 } 3671 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 3672 3673 3674 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_eq_16)3675 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_eq_16) { 3676 TEST_REQUIRES_ARM_NEON; 3677 GemmMicrokernelTester() 3678 .mr(3) 3679 .nr(16) 3680 .kr(2) 3681 .sr(1) 3682 .m(3) 3683 .n(16) 3684 .k(16) 3685 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3686 } 3687 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,strided_cn)3688 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, strided_cn) { 3689 TEST_REQUIRES_ARM_NEON; 3690 GemmMicrokernelTester() 3691 .mr(3) 3692 .nr(16) 3693 .kr(2) 3694 .sr(1) 3695 .m(3) 3696 .n(16) 3697 .k(16) 3698 .cn_stride(19) 3699 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3700 } 3701 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_eq_16_strided_a)3702 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_eq_16_strided_a) { 3703 TEST_REQUIRES_ARM_NEON; 3704 GemmMicrokernelTester() 3705 .mr(3) 3706 .nr(16) 3707 .kr(2) 3708 .sr(1) 3709 .m(3) 3710 .n(16) 3711 .k(16) 3712 .a_stride(19) 3713 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3714 } 3715 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_eq_16_subtile)3716 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_eq_16_subtile) { 3717 TEST_REQUIRES_ARM_NEON; 3718 for (uint32_t n = 1; n <= 16; n++) { 3719 for (uint32_t m = 1; m <= 3; m++) { 3720 GemmMicrokernelTester() 3721 .mr(3) 3722 .nr(16) 3723 .kr(2) 3724 .sr(1) 3725 .m(m) 3726 .n(n) 3727 .k(16) 3728 .iterations(1) 3729 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3730 } 3731 } 3732 } 3733 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_eq_16_subtile_m)3734 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) { 3735 TEST_REQUIRES_ARM_NEON; 3736 for (uint32_t m = 1; m <= 3; m++) { 3737 GemmMicrokernelTester() 3738 .mr(3) 3739 .nr(16) 3740 .kr(2) 3741 .sr(1) 3742 .m(m) 3743 .n(16) 3744 .k(16) 3745 .iterations(1) 3746 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3747 } 3748 } 3749 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_eq_16_subtile_n)3750 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) { 3751 TEST_REQUIRES_ARM_NEON; 3752 for (uint32_t n = 1; n <= 16; n++) { 3753 GemmMicrokernelTester() 3754 .mr(3) 3755 .nr(16) 3756 .kr(2) 3757 .sr(1) 3758 .m(3) 3759 .n(n) 3760 .k(16) 3761 .iterations(1) 3762 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3763 } 3764 } 3765 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_lt_16)3766 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_lt_16) { 3767 TEST_REQUIRES_ARM_NEON; 3768 for (size_t k = 1; k < 16; k++) { 3769 GemmMicrokernelTester() 3770 .mr(3) 3771 .nr(16) 3772 .kr(2) 3773 .sr(1) 3774 .m(3) 3775 .n(16) 3776 .k(k) 3777 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3778 } 3779 } 3780 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_lt_16_strided_a)3781 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_lt_16_strided_a) { 3782 TEST_REQUIRES_ARM_NEON; 3783 for (size_t k = 1; k < 16; k++) { 3784 GemmMicrokernelTester() 3785 .mr(3) 3786 .nr(16) 3787 .kr(2) 3788 .sr(1) 3789 .m(3) 3790 .n(16) 3791 .k(k) 3792 .a_stride(19) 3793 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3794 } 3795 } 3796 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_lt_16_subtile)3797 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_lt_16_subtile) { 3798 TEST_REQUIRES_ARM_NEON; 3799 for (size_t k = 1; k < 16; k++) { 3800 for (uint32_t n = 1; n <= 16; n++) { 3801 for (uint32_t m = 1; m <= 3; m++) { 3802 GemmMicrokernelTester() 3803 .mr(3) 3804 .nr(16) 3805 .kr(2) 3806 .sr(1) 3807 .m(m) 3808 .n(n) 3809 .k(k) 3810 .iterations(1) 3811 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3812 } 3813 } 3814 } 3815 } 3816 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_gt_16)3817 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_gt_16) { 3818 TEST_REQUIRES_ARM_NEON; 3819 for (size_t k = 17; k < 32; k++) { 3820 GemmMicrokernelTester() 3821 .mr(3) 3822 .nr(16) 3823 .kr(2) 3824 .sr(1) 3825 .m(3) 3826 .n(16) 3827 .k(k) 3828 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3829 } 3830 } 3831 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_gt_16_strided_a)3832 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_gt_16_strided_a) { 3833 TEST_REQUIRES_ARM_NEON; 3834 for (size_t k = 17; k < 32; k++) { 3835 GemmMicrokernelTester() 3836 .mr(3) 3837 .nr(16) 3838 .kr(2) 3839 .sr(1) 3840 .m(3) 3841 .n(16) 3842 .k(k) 3843 .a_stride(37) 3844 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3845 } 3846 } 3847 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_gt_16_subtile)3848 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_gt_16_subtile) { 3849 TEST_REQUIRES_ARM_NEON; 3850 for (size_t k = 17; k < 32; k++) { 3851 for (uint32_t n = 1; n <= 16; n++) { 3852 for (uint32_t m = 1; m <= 3; m++) { 3853 GemmMicrokernelTester() 3854 .mr(3) 3855 .nr(16) 3856 .kr(2) 3857 .sr(1) 3858 .m(m) 3859 .n(n) 3860 .k(k) 3861 .iterations(1) 3862 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3863 } 3864 } 3865 } 3866 } 3867 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_div_16)3868 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_div_16) { 3869 TEST_REQUIRES_ARM_NEON; 3870 for (size_t k = 32; k <= 160; k += 16) { 3871 GemmMicrokernelTester() 3872 .mr(3) 3873 .nr(16) 3874 .kr(2) 3875 .sr(1) 3876 .m(3) 3877 .n(16) 3878 .k(k) 3879 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3880 } 3881 } 3882 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_div_16_strided_a)3883 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_div_16_strided_a) { 3884 TEST_REQUIRES_ARM_NEON; 3885 for (size_t k = 32; k <= 160; k += 16) { 3886 GemmMicrokernelTester() 3887 .mr(3) 3888 .nr(16) 3889 .kr(2) 3890 .sr(1) 3891 .m(3) 3892 .n(16) 3893 .k(k) 3894 .a_stride(163) 3895 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3896 } 3897 } 3898 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_div_16_subtile)3899 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_div_16_subtile) { 3900 TEST_REQUIRES_ARM_NEON; 3901 for (size_t k = 32; k <= 160; k += 16) { 3902 for (uint32_t n = 1; n <= 16; n++) { 3903 for (uint32_t m = 1; m <= 3; m++) { 3904 GemmMicrokernelTester() 3905 .mr(3) 3906 .nr(16) 3907 .kr(2) 3908 .sr(1) 3909 .m(m) 3910 .n(n) 3911 .k(k) 3912 .iterations(1) 3913 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3914 } 3915 } 3916 } 3917 } 3918 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,n_gt_16)3919 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_gt_16) { 3920 TEST_REQUIRES_ARM_NEON; 3921 for (uint32_t n = 17; n < 32; n++) { 3922 for (size_t k = 1; k <= 80; k += 17) { 3923 GemmMicrokernelTester() 3924 .mr(3) 3925 .nr(16) 3926 .kr(2) 3927 .sr(1) 3928 .m(3) 3929 .n(n) 3930 .k(k) 3931 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3932 } 3933 } 3934 } 3935 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,n_gt_16_strided_cn)3936 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_gt_16_strided_cn) { 3937 TEST_REQUIRES_ARM_NEON; 3938 for (uint32_t n = 17; n < 32; n++) { 3939 for (size_t k = 1; k <= 80; k += 17) { 3940 GemmMicrokernelTester() 3941 .mr(3) 3942 .nr(16) 3943 .kr(2) 3944 .sr(1) 3945 .m(3) 3946 .n(n) 3947 .k(k) 3948 .cn_stride(19) 3949 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3950 } 3951 } 3952 } 3953 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,n_gt_16_strided_a)3954 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_gt_16_strided_a) { 3955 TEST_REQUIRES_ARM_NEON; 3956 for (uint32_t n = 17; n < 32; n++) { 3957 for (size_t k = 1; k <= 80; k += 17) { 3958 GemmMicrokernelTester() 3959 .mr(3) 3960 .nr(16) 3961 .kr(2) 3962 .sr(1) 3963 .m(3) 3964 .n(n) 3965 .k(k) 3966 .a_stride(83) 3967 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3968 } 3969 } 3970 } 3971 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,n_gt_16_subtile)3972 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_gt_16_subtile) { 3973 TEST_REQUIRES_ARM_NEON; 3974 for (uint32_t n = 17; n < 32; n++) { 3975 for (size_t k = 1; k <= 80; k += 17) { 3976 for (uint32_t m = 1; m <= 3; m++) { 3977 GemmMicrokernelTester() 3978 .mr(3) 3979 .nr(16) 3980 .kr(2) 3981 .sr(1) 3982 .m(m) 3983 .n(n) 3984 .k(k) 3985 .iterations(1) 3986 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3987 } 3988 } 3989 } 3990 } 3991 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,n_div_16)3992 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_div_16) { 3993 TEST_REQUIRES_ARM_NEON; 3994 for (uint32_t n = 32; n <= 48; n += 16) { 3995 for (size_t k = 1; k <= 80; k += 17) { 3996 GemmMicrokernelTester() 3997 .mr(3) 3998 .nr(16) 3999 .kr(2) 4000 .sr(1) 4001 .m(3) 4002 .n(n) 4003 .k(k) 4004 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4005 } 4006 } 4007 } 4008 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,n_div_16_strided_cn)4009 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_div_16_strided_cn) { 4010 TEST_REQUIRES_ARM_NEON; 4011 for (uint32_t n = 32; n <= 48; n += 16) { 4012 for (size_t k = 1; k <= 80; k += 17) { 4013 GemmMicrokernelTester() 4014 .mr(3) 4015 .nr(16) 4016 .kr(2) 4017 .sr(1) 4018 .m(3) 4019 .n(n) 4020 .k(k) 4021 .cn_stride(19) 4022 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4023 } 4024 } 4025 } 4026 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,n_div_16_strided_a)4027 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_div_16_strided_a) { 4028 TEST_REQUIRES_ARM_NEON; 4029 for (uint32_t n = 32; n <= 48; n += 16) { 4030 for (size_t k = 1; k <= 80; k += 17) { 4031 GemmMicrokernelTester() 4032 .mr(3) 4033 .nr(16) 4034 .kr(2) 4035 .sr(1) 4036 .m(3) 4037 .n(n) 4038 .k(k) 4039 .a_stride(83) 4040 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4041 } 4042 } 4043 } 4044 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,n_div_16_subtile)4045 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_div_16_subtile) { 4046 TEST_REQUIRES_ARM_NEON; 4047 for (uint32_t n = 32; n <= 48; n += 16) { 4048 for (size_t k = 1; k <= 80; k += 17) { 4049 for (uint32_t m = 1; m <= 3; m++) { 4050 GemmMicrokernelTester() 4051 .mr(3) 4052 .nr(16) 4053 .kr(2) 4054 .sr(1) 4055 .m(m) 4056 .n(n) 4057 .k(k) 4058 .iterations(1) 4059 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4060 } 4061 } 4062 } 4063 } 4064 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,strided_cm_subtile)4065 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, strided_cm_subtile) { 4066 TEST_REQUIRES_ARM_NEON; 4067 for (size_t k = 1; k <= 80; k += 17) { 4068 for (uint32_t n = 1; n <= 16; n++) { 4069 for (uint32_t m = 1; m <= 3; m++) { 4070 GemmMicrokernelTester() 4071 .mr(3) 4072 .nr(16) 4073 .kr(2) 4074 .sr(1) 4075 .m(m) 4076 .n(n) 4077 .k(k) 4078 .cm_stride(19) 4079 .iterations(1) 4080 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4081 } 4082 } 4083 } 4084 } 4085 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,qmin)4086 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, qmin) { 4087 TEST_REQUIRES_ARM_NEON; 4088 GemmMicrokernelTester() 4089 .mr(3) 4090 .nr(16) 4091 .kr(2) 4092 .sr(1) 4093 .m(3) 4094 .n(16) 4095 .k(16) 4096 .qmin(128) 4097 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4098 } 4099 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,qmax)4100 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, qmax) { 4101 TEST_REQUIRES_ARM_NEON; 4102 GemmMicrokernelTester() 4103 .mr(3) 4104 .nr(16) 4105 .kr(2) 4106 .sr(1) 4107 .m(3) 4108 .n(16) 4109 .k(16) 4110 .qmax(128) 4111 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4112 } 4113 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,strided_cm)4114 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, strided_cm) { 4115 TEST_REQUIRES_ARM_NEON; 4116 GemmMicrokernelTester() 4117 .mr(3) 4118 .nr(16) 4119 .kr(2) 4120 .sr(1) 4121 .m(3) 4122 .n(16) 4123 .k(16) 4124 .cm_stride(19) 4125 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4126 } 4127 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 4128 4129 4130 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_eq_8)4131 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_eq_8) { 4132 TEST_REQUIRES_ARM_NEON; 4133 GemmMicrokernelTester() 4134 .mr(1) 4135 .nr(8) 4136 .kr(2) 4137 .sr(1) 4138 .m(1) 4139 .n(8) 4140 .k(8) 4141 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4142 } 4143 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,strided_cn)4144 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, strided_cn) { 4145 TEST_REQUIRES_ARM_NEON; 4146 GemmMicrokernelTester() 4147 .mr(1) 4148 .nr(8) 4149 .kr(2) 4150 .sr(1) 4151 .m(1) 4152 .n(8) 4153 .k(8) 4154 .cn_stride(11) 4155 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4156 } 4157 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_eq_8_strided_a)4158 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_eq_8_strided_a) { 4159 TEST_REQUIRES_ARM_NEON; 4160 GemmMicrokernelTester() 4161 .mr(1) 4162 .nr(8) 4163 .kr(2) 4164 .sr(1) 4165 .m(1) 4166 .n(8) 4167 .k(8) 4168 .a_stride(11) 4169 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4170 } 4171 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_eq_8_subtile)4172 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_eq_8_subtile) { 4173 TEST_REQUIRES_ARM_NEON; 4174 for (uint32_t n = 1; n <= 8; n++) { 4175 for (uint32_t m = 1; m <= 1; m++) { 4176 GemmMicrokernelTester() 4177 .mr(1) 4178 .nr(8) 4179 .kr(2) 4180 .sr(1) 4181 .m(m) 4182 .n(n) 4183 .k(8) 4184 .iterations(1) 4185 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4186 } 4187 } 4188 } 4189 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_eq_8_subtile_m)4190 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_eq_8_subtile_m) { 4191 TEST_REQUIRES_ARM_NEON; 4192 for (uint32_t m = 1; m <= 1; m++) { 4193 GemmMicrokernelTester() 4194 .mr(1) 4195 .nr(8) 4196 .kr(2) 4197 .sr(1) 4198 .m(m) 4199 .n(8) 4200 .k(8) 4201 .iterations(1) 4202 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4203 } 4204 } 4205 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_eq_8_subtile_n)4206 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_eq_8_subtile_n) { 4207 TEST_REQUIRES_ARM_NEON; 4208 for (uint32_t n = 1; n <= 8; n++) { 4209 GemmMicrokernelTester() 4210 .mr(1) 4211 .nr(8) 4212 .kr(2) 4213 .sr(1) 4214 .m(1) 4215 .n(n) 4216 .k(8) 4217 .iterations(1) 4218 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4219 } 4220 } 4221 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_lt_8)4222 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_lt_8) { 4223 TEST_REQUIRES_ARM_NEON; 4224 for (size_t k = 1; k < 8; k++) { 4225 GemmMicrokernelTester() 4226 .mr(1) 4227 .nr(8) 4228 .kr(2) 4229 .sr(1) 4230 .m(1) 4231 .n(8) 4232 .k(k) 4233 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4234 } 4235 } 4236 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_lt_8_strided_a)4237 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_lt_8_strided_a) { 4238 TEST_REQUIRES_ARM_NEON; 4239 for (size_t k = 1; k < 8; k++) { 4240 GemmMicrokernelTester() 4241 .mr(1) 4242 .nr(8) 4243 .kr(2) 4244 .sr(1) 4245 .m(1) 4246 .n(8) 4247 .k(k) 4248 .a_stride(11) 4249 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4250 } 4251 } 4252 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_lt_8_subtile)4253 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_lt_8_subtile) { 4254 TEST_REQUIRES_ARM_NEON; 4255 for (size_t k = 1; k < 8; k++) { 4256 for (uint32_t n = 1; n <= 8; n++) { 4257 for (uint32_t m = 1; m <= 1; m++) { 4258 GemmMicrokernelTester() 4259 .mr(1) 4260 .nr(8) 4261 .kr(2) 4262 .sr(1) 4263 .m(m) 4264 .n(n) 4265 .k(k) 4266 .iterations(1) 4267 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4268 } 4269 } 4270 } 4271 } 4272 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_gt_8)4273 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_gt_8) { 4274 TEST_REQUIRES_ARM_NEON; 4275 for (size_t k = 9; k < 16; k++) { 4276 GemmMicrokernelTester() 4277 .mr(1) 4278 .nr(8) 4279 .kr(2) 4280 .sr(1) 4281 .m(1) 4282 .n(8) 4283 .k(k) 4284 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4285 } 4286 } 4287 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_gt_8_strided_a)4288 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_gt_8_strided_a) { 4289 TEST_REQUIRES_ARM_NEON; 4290 for (size_t k = 9; k < 16; k++) { 4291 GemmMicrokernelTester() 4292 .mr(1) 4293 .nr(8) 4294 .kr(2) 4295 .sr(1) 4296 .m(1) 4297 .n(8) 4298 .k(k) 4299 .a_stride(19) 4300 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4301 } 4302 } 4303 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_gt_8_subtile)4304 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_gt_8_subtile) { 4305 TEST_REQUIRES_ARM_NEON; 4306 for (size_t k = 9; k < 16; k++) { 4307 for (uint32_t n = 1; n <= 8; n++) { 4308 for (uint32_t m = 1; m <= 1; m++) { 4309 GemmMicrokernelTester() 4310 .mr(1) 4311 .nr(8) 4312 .kr(2) 4313 .sr(1) 4314 .m(m) 4315 .n(n) 4316 .k(k) 4317 .iterations(1) 4318 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4319 } 4320 } 4321 } 4322 } 4323 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_div_8)4324 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_div_8) { 4325 TEST_REQUIRES_ARM_NEON; 4326 for (size_t k = 16; k <= 80; k += 8) { 4327 GemmMicrokernelTester() 4328 .mr(1) 4329 .nr(8) 4330 .kr(2) 4331 .sr(1) 4332 .m(1) 4333 .n(8) 4334 .k(k) 4335 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4336 } 4337 } 4338 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_div_8_strided_a)4339 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_div_8_strided_a) { 4340 TEST_REQUIRES_ARM_NEON; 4341 for (size_t k = 16; k <= 80; k += 8) { 4342 GemmMicrokernelTester() 4343 .mr(1) 4344 .nr(8) 4345 .kr(2) 4346 .sr(1) 4347 .m(1) 4348 .n(8) 4349 .k(k) 4350 .a_stride(83) 4351 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4352 } 4353 } 4354 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_div_8_subtile)4355 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_div_8_subtile) { 4356 TEST_REQUIRES_ARM_NEON; 4357 for (size_t k = 16; k <= 80; k += 8) { 4358 for (uint32_t n = 1; n <= 8; n++) { 4359 for (uint32_t m = 1; m <= 1; m++) { 4360 GemmMicrokernelTester() 4361 .mr(1) 4362 .nr(8) 4363 .kr(2) 4364 .sr(1) 4365 .m(m) 4366 .n(n) 4367 .k(k) 4368 .iterations(1) 4369 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4370 } 4371 } 4372 } 4373 } 4374 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,n_gt_8)4375 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_gt_8) { 4376 TEST_REQUIRES_ARM_NEON; 4377 for (uint32_t n = 9; n < 16; n++) { 4378 for (size_t k = 1; k <= 40; k += 9) { 4379 GemmMicrokernelTester() 4380 .mr(1) 4381 .nr(8) 4382 .kr(2) 4383 .sr(1) 4384 .m(1) 4385 .n(n) 4386 .k(k) 4387 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4388 } 4389 } 4390 } 4391 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,n_gt_8_strided_cn)4392 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_gt_8_strided_cn) { 4393 TEST_REQUIRES_ARM_NEON; 4394 for (uint32_t n = 9; n < 16; n++) { 4395 for (size_t k = 1; k <= 40; k += 9) { 4396 GemmMicrokernelTester() 4397 .mr(1) 4398 .nr(8) 4399 .kr(2) 4400 .sr(1) 4401 .m(1) 4402 .n(n) 4403 .k(k) 4404 .cn_stride(11) 4405 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4406 } 4407 } 4408 } 4409 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,n_gt_8_strided_a)4410 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_gt_8_strided_a) { 4411 TEST_REQUIRES_ARM_NEON; 4412 for (uint32_t n = 9; n < 16; n++) { 4413 for (size_t k = 1; k <= 40; k += 9) { 4414 GemmMicrokernelTester() 4415 .mr(1) 4416 .nr(8) 4417 .kr(2) 4418 .sr(1) 4419 .m(1) 4420 .n(n) 4421 .k(k) 4422 .a_stride(43) 4423 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4424 } 4425 } 4426 } 4427 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,n_gt_8_subtile)4428 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_gt_8_subtile) { 4429 TEST_REQUIRES_ARM_NEON; 4430 for (uint32_t n = 9; n < 16; n++) { 4431 for (size_t k = 1; k <= 40; k += 9) { 4432 for (uint32_t m = 1; m <= 1; m++) { 4433 GemmMicrokernelTester() 4434 .mr(1) 4435 .nr(8) 4436 .kr(2) 4437 .sr(1) 4438 .m(m) 4439 .n(n) 4440 .k(k) 4441 .iterations(1) 4442 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4443 } 4444 } 4445 } 4446 } 4447 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,n_div_8)4448 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_div_8) { 4449 TEST_REQUIRES_ARM_NEON; 4450 for (uint32_t n = 16; n <= 24; n += 8) { 4451 for (size_t k = 1; k <= 40; k += 9) { 4452 GemmMicrokernelTester() 4453 .mr(1) 4454 .nr(8) 4455 .kr(2) 4456 .sr(1) 4457 .m(1) 4458 .n(n) 4459 .k(k) 4460 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4461 } 4462 } 4463 } 4464 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,n_div_8_strided_cn)4465 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_div_8_strided_cn) { 4466 TEST_REQUIRES_ARM_NEON; 4467 for (uint32_t n = 16; n <= 24; n += 8) { 4468 for (size_t k = 1; k <= 40; k += 9) { 4469 GemmMicrokernelTester() 4470 .mr(1) 4471 .nr(8) 4472 .kr(2) 4473 .sr(1) 4474 .m(1) 4475 .n(n) 4476 .k(k) 4477 .cn_stride(11) 4478 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4479 } 4480 } 4481 } 4482 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,n_div_8_strided_a)4483 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_div_8_strided_a) { 4484 TEST_REQUIRES_ARM_NEON; 4485 for (uint32_t n = 16; n <= 24; n += 8) { 4486 for (size_t k = 1; k <= 40; k += 9) { 4487 GemmMicrokernelTester() 4488 .mr(1) 4489 .nr(8) 4490 .kr(2) 4491 .sr(1) 4492 .m(1) 4493 .n(n) 4494 .k(k) 4495 .a_stride(43) 4496 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4497 } 4498 } 4499 } 4500 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,n_div_8_subtile)4501 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_div_8_subtile) { 4502 TEST_REQUIRES_ARM_NEON; 4503 for (uint32_t n = 16; n <= 24; n += 8) { 4504 for (size_t k = 1; k <= 40; k += 9) { 4505 for (uint32_t m = 1; m <= 1; m++) { 4506 GemmMicrokernelTester() 4507 .mr(1) 4508 .nr(8) 4509 .kr(2) 4510 .sr(1) 4511 .m(m) 4512 .n(n) 4513 .k(k) 4514 .iterations(1) 4515 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4516 } 4517 } 4518 } 4519 } 4520 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,strided_cm_subtile)4521 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, strided_cm_subtile) { 4522 TEST_REQUIRES_ARM_NEON; 4523 for (size_t k = 1; k <= 40; k += 9) { 4524 for (uint32_t n = 1; n <= 8; n++) { 4525 for (uint32_t m = 1; m <= 1; m++) { 4526 GemmMicrokernelTester() 4527 .mr(1) 4528 .nr(8) 4529 .kr(2) 4530 .sr(1) 4531 .m(m) 4532 .n(n) 4533 .k(k) 4534 .cm_stride(11) 4535 .iterations(1) 4536 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4537 } 4538 } 4539 } 4540 } 4541 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,qmin)4542 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, qmin) { 4543 TEST_REQUIRES_ARM_NEON; 4544 GemmMicrokernelTester() 4545 .mr(1) 4546 .nr(8) 4547 .kr(2) 4548 .sr(1) 4549 .m(1) 4550 .n(8) 4551 .k(8) 4552 .qmin(128) 4553 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4554 } 4555 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,qmax)4556 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, qmax) { 4557 TEST_REQUIRES_ARM_NEON; 4558 GemmMicrokernelTester() 4559 .mr(1) 4560 .nr(8) 4561 .kr(2) 4562 .sr(1) 4563 .m(1) 4564 .n(8) 4565 .k(8) 4566 .qmax(128) 4567 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4568 } 4569 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,strided_cm)4570 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, strided_cm) { 4571 TEST_REQUIRES_ARM_NEON; 4572 GemmMicrokernelTester() 4573 .mr(1) 4574 .nr(8) 4575 .kr(2) 4576 .sr(1) 4577 .m(1) 4578 .n(8) 4579 .k(8) 4580 .cm_stride(11) 4581 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4582 } 4583 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 4584 4585 4586 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_eq_16)4587 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16) { 4588 TEST_REQUIRES_ARM_NEON; 4589 GemmMicrokernelTester() 4590 .mr(1) 4591 .nr(16) 4592 .kr(2) 4593 .sr(1) 4594 .m(1) 4595 .n(16) 4596 .k(16) 4597 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4598 } 4599 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,strided_cn)4600 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, strided_cn) { 4601 TEST_REQUIRES_ARM_NEON; 4602 GemmMicrokernelTester() 4603 .mr(1) 4604 .nr(16) 4605 .kr(2) 4606 .sr(1) 4607 .m(1) 4608 .n(16) 4609 .k(16) 4610 .cn_stride(19) 4611 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4612 } 4613 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_eq_16_strided_a)4614 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16_strided_a) { 4615 TEST_REQUIRES_ARM_NEON; 4616 GemmMicrokernelTester() 4617 .mr(1) 4618 .nr(16) 4619 .kr(2) 4620 .sr(1) 4621 .m(1) 4622 .n(16) 4623 .k(16) 4624 .a_stride(19) 4625 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4626 } 4627 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_eq_16_subtile)4628 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16_subtile) { 4629 TEST_REQUIRES_ARM_NEON; 4630 for (uint32_t n = 1; n <= 16; n++) { 4631 for (uint32_t m = 1; m <= 1; m++) { 4632 GemmMicrokernelTester() 4633 .mr(1) 4634 .nr(16) 4635 .kr(2) 4636 .sr(1) 4637 .m(m) 4638 .n(n) 4639 .k(16) 4640 .iterations(1) 4641 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4642 } 4643 } 4644 } 4645 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_eq_16_subtile_m)4646 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) { 4647 TEST_REQUIRES_ARM_NEON; 4648 for (uint32_t m = 1; m <= 1; m++) { 4649 GemmMicrokernelTester() 4650 .mr(1) 4651 .nr(16) 4652 .kr(2) 4653 .sr(1) 4654 .m(m) 4655 .n(16) 4656 .k(16) 4657 .iterations(1) 4658 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4659 } 4660 } 4661 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_eq_16_subtile_n)4662 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) { 4663 TEST_REQUIRES_ARM_NEON; 4664 for (uint32_t n = 1; n <= 16; n++) { 4665 GemmMicrokernelTester() 4666 .mr(1) 4667 .nr(16) 4668 .kr(2) 4669 .sr(1) 4670 .m(1) 4671 .n(n) 4672 .k(16) 4673 .iterations(1) 4674 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4675 } 4676 } 4677 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_lt_16)4678 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_lt_16) { 4679 TEST_REQUIRES_ARM_NEON; 4680 for (size_t k = 1; k < 16; k++) { 4681 GemmMicrokernelTester() 4682 .mr(1) 4683 .nr(16) 4684 .kr(2) 4685 .sr(1) 4686 .m(1) 4687 .n(16) 4688 .k(k) 4689 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4690 } 4691 } 4692 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_lt_16_strided_a)4693 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_lt_16_strided_a) { 4694 TEST_REQUIRES_ARM_NEON; 4695 for (size_t k = 1; k < 16; k++) { 4696 GemmMicrokernelTester() 4697 .mr(1) 4698 .nr(16) 4699 .kr(2) 4700 .sr(1) 4701 .m(1) 4702 .n(16) 4703 .k(k) 4704 .a_stride(19) 4705 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4706 } 4707 } 4708 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_lt_16_subtile)4709 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_lt_16_subtile) { 4710 TEST_REQUIRES_ARM_NEON; 4711 for (size_t k = 1; k < 16; k++) { 4712 for (uint32_t n = 1; n <= 16; n++) { 4713 for (uint32_t m = 1; m <= 1; m++) { 4714 GemmMicrokernelTester() 4715 .mr(1) 4716 .nr(16) 4717 .kr(2) 4718 .sr(1) 4719 .m(m) 4720 .n(n) 4721 .k(k) 4722 .iterations(1) 4723 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4724 } 4725 } 4726 } 4727 } 4728 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_gt_16)4729 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_gt_16) { 4730 TEST_REQUIRES_ARM_NEON; 4731 for (size_t k = 17; k < 32; k++) { 4732 GemmMicrokernelTester() 4733 .mr(1) 4734 .nr(16) 4735 .kr(2) 4736 .sr(1) 4737 .m(1) 4738 .n(16) 4739 .k(k) 4740 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4741 } 4742 } 4743 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_gt_16_strided_a)4744 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_gt_16_strided_a) { 4745 TEST_REQUIRES_ARM_NEON; 4746 for (size_t k = 17; k < 32; k++) { 4747 GemmMicrokernelTester() 4748 .mr(1) 4749 .nr(16) 4750 .kr(2) 4751 .sr(1) 4752 .m(1) 4753 .n(16) 4754 .k(k) 4755 .a_stride(37) 4756 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4757 } 4758 } 4759 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_gt_16_subtile)4760 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_gt_16_subtile) { 4761 TEST_REQUIRES_ARM_NEON; 4762 for (size_t k = 17; k < 32; k++) { 4763 for (uint32_t n = 1; n <= 16; n++) { 4764 for (uint32_t m = 1; m <= 1; m++) { 4765 GemmMicrokernelTester() 4766 .mr(1) 4767 .nr(16) 4768 .kr(2) 4769 .sr(1) 4770 .m(m) 4771 .n(n) 4772 .k(k) 4773 .iterations(1) 4774 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4775 } 4776 } 4777 } 4778 } 4779 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_div_16)4780 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_div_16) { 4781 TEST_REQUIRES_ARM_NEON; 4782 for (size_t k = 32; k <= 160; k += 16) { 4783 GemmMicrokernelTester() 4784 .mr(1) 4785 .nr(16) 4786 .kr(2) 4787 .sr(1) 4788 .m(1) 4789 .n(16) 4790 .k(k) 4791 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4792 } 4793 } 4794 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_div_16_strided_a)4795 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_div_16_strided_a) { 4796 TEST_REQUIRES_ARM_NEON; 4797 for (size_t k = 32; k <= 160; k += 16) { 4798 GemmMicrokernelTester() 4799 .mr(1) 4800 .nr(16) 4801 .kr(2) 4802 .sr(1) 4803 .m(1) 4804 .n(16) 4805 .k(k) 4806 .a_stride(163) 4807 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4808 } 4809 } 4810 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_div_16_subtile)4811 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_div_16_subtile) { 4812 TEST_REQUIRES_ARM_NEON; 4813 for (size_t k = 32; k <= 160; k += 16) { 4814 for (uint32_t n = 1; n <= 16; n++) { 4815 for (uint32_t m = 1; m <= 1; m++) { 4816 GemmMicrokernelTester() 4817 .mr(1) 4818 .nr(16) 4819 .kr(2) 4820 .sr(1) 4821 .m(m) 4822 .n(n) 4823 .k(k) 4824 .iterations(1) 4825 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4826 } 4827 } 4828 } 4829 } 4830 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,n_gt_16)4831 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_gt_16) { 4832 TEST_REQUIRES_ARM_NEON; 4833 for (uint32_t n = 17; n < 32; n++) { 4834 for (size_t k = 1; k <= 80; k += 17) { 4835 GemmMicrokernelTester() 4836 .mr(1) 4837 .nr(16) 4838 .kr(2) 4839 .sr(1) 4840 .m(1) 4841 .n(n) 4842 .k(k) 4843 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4844 } 4845 } 4846 } 4847 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,n_gt_16_strided_cn)4848 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_gt_16_strided_cn) { 4849 TEST_REQUIRES_ARM_NEON; 4850 for (uint32_t n = 17; n < 32; n++) { 4851 for (size_t k = 1; k <= 80; k += 17) { 4852 GemmMicrokernelTester() 4853 .mr(1) 4854 .nr(16) 4855 .kr(2) 4856 .sr(1) 4857 .m(1) 4858 .n(n) 4859 .k(k) 4860 .cn_stride(19) 4861 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4862 } 4863 } 4864 } 4865 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,n_gt_16_strided_a)4866 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_gt_16_strided_a) { 4867 TEST_REQUIRES_ARM_NEON; 4868 for (uint32_t n = 17; n < 32; n++) { 4869 for (size_t k = 1; k <= 80; k += 17) { 4870 GemmMicrokernelTester() 4871 .mr(1) 4872 .nr(16) 4873 .kr(2) 4874 .sr(1) 4875 .m(1) 4876 .n(n) 4877 .k(k) 4878 .a_stride(83) 4879 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4880 } 4881 } 4882 } 4883 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,n_gt_16_subtile)4884 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_gt_16_subtile) { 4885 TEST_REQUIRES_ARM_NEON; 4886 for (uint32_t n = 17; n < 32; n++) { 4887 for (size_t k = 1; k <= 80; k += 17) { 4888 for (uint32_t m = 1; m <= 1; m++) { 4889 GemmMicrokernelTester() 4890 .mr(1) 4891 .nr(16) 4892 .kr(2) 4893 .sr(1) 4894 .m(m) 4895 .n(n) 4896 .k(k) 4897 .iterations(1) 4898 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4899 } 4900 } 4901 } 4902 } 4903 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,n_div_16)4904 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_div_16) { 4905 TEST_REQUIRES_ARM_NEON; 4906 for (uint32_t n = 32; n <= 48; n += 16) { 4907 for (size_t k = 1; k <= 80; k += 17) { 4908 GemmMicrokernelTester() 4909 .mr(1) 4910 .nr(16) 4911 .kr(2) 4912 .sr(1) 4913 .m(1) 4914 .n(n) 4915 .k(k) 4916 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4917 } 4918 } 4919 } 4920 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,n_div_16_strided_cn)4921 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_div_16_strided_cn) { 4922 TEST_REQUIRES_ARM_NEON; 4923 for (uint32_t n = 32; n <= 48; n += 16) { 4924 for (size_t k = 1; k <= 80; k += 17) { 4925 GemmMicrokernelTester() 4926 .mr(1) 4927 .nr(16) 4928 .kr(2) 4929 .sr(1) 4930 .m(1) 4931 .n(n) 4932 .k(k) 4933 .cn_stride(19) 4934 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4935 } 4936 } 4937 } 4938 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,n_div_16_strided_a)4939 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_div_16_strided_a) { 4940 TEST_REQUIRES_ARM_NEON; 4941 for (uint32_t n = 32; n <= 48; n += 16) { 4942 for (size_t k = 1; k <= 80; k += 17) { 4943 GemmMicrokernelTester() 4944 .mr(1) 4945 .nr(16) 4946 .kr(2) 4947 .sr(1) 4948 .m(1) 4949 .n(n) 4950 .k(k) 4951 .a_stride(83) 4952 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4953 } 4954 } 4955 } 4956 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,n_div_16_subtile)4957 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_div_16_subtile) { 4958 TEST_REQUIRES_ARM_NEON; 4959 for (uint32_t n = 32; n <= 48; n += 16) { 4960 for (size_t k = 1; k <= 80; k += 17) { 4961 for (uint32_t m = 1; m <= 1; m++) { 4962 GemmMicrokernelTester() 4963 .mr(1) 4964 .nr(16) 4965 .kr(2) 4966 .sr(1) 4967 .m(m) 4968 .n(n) 4969 .k(k) 4970 .iterations(1) 4971 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4972 } 4973 } 4974 } 4975 } 4976 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,strided_cm_subtile)4977 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, strided_cm_subtile) { 4978 TEST_REQUIRES_ARM_NEON; 4979 for (size_t k = 1; k <= 80; k += 17) { 4980 for (uint32_t n = 1; n <= 16; n++) { 4981 for (uint32_t m = 1; m <= 1; m++) { 4982 GemmMicrokernelTester() 4983 .mr(1) 4984 .nr(16) 4985 .kr(2) 4986 .sr(1) 4987 .m(m) 4988 .n(n) 4989 .k(k) 4990 .cm_stride(19) 4991 .iterations(1) 4992 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4993 } 4994 } 4995 } 4996 } 4997 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,qmin)4998 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, qmin) { 4999 TEST_REQUIRES_ARM_NEON; 5000 GemmMicrokernelTester() 5001 .mr(1) 5002 .nr(16) 5003 .kr(2) 5004 .sr(1) 5005 .m(1) 5006 .n(16) 5007 .k(16) 5008 .qmin(128) 5009 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5010 } 5011 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,qmax)5012 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, qmax) { 5013 TEST_REQUIRES_ARM_NEON; 5014 GemmMicrokernelTester() 5015 .mr(1) 5016 .nr(16) 5017 .kr(2) 5018 .sr(1) 5019 .m(1) 5020 .n(16) 5021 .k(16) 5022 .qmax(128) 5023 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5024 } 5025 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,strided_cm)5026 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, strided_cm) { 5027 TEST_REQUIRES_ARM_NEON; 5028 GemmMicrokernelTester() 5029 .mr(1) 5030 .nr(16) 5031 .kr(2) 5032 .sr(1) 5033 .m(1) 5034 .n(16) 5035 .k(16) 5036 .cm_stride(19) 5037 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5038 } 5039 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 5040 5041 5042 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_eq_8)5043 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_eq_8) { 5044 TEST_REQUIRES_ARM_NEON; 5045 GemmMicrokernelTester() 5046 .mr(2) 5047 .nr(8) 5048 .kr(4) 5049 .sr(2) 5050 .m(2) 5051 .n(8) 5052 .k(8) 5053 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5054 } 5055 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,strided_cn)5056 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, strided_cn) { 5057 TEST_REQUIRES_ARM_NEON; 5058 GemmMicrokernelTester() 5059 .mr(2) 5060 .nr(8) 5061 .kr(4) 5062 .sr(2) 5063 .m(2) 5064 .n(8) 5065 .k(8) 5066 .cn_stride(11) 5067 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5068 } 5069 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_eq_8_strided_a)5070 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_eq_8_strided_a) { 5071 TEST_REQUIRES_ARM_NEON; 5072 GemmMicrokernelTester() 5073 .mr(2) 5074 .nr(8) 5075 .kr(4) 5076 .sr(2) 5077 .m(2) 5078 .n(8) 5079 .k(8) 5080 .a_stride(11) 5081 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5082 } 5083 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_eq_8_subtile)5084 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_eq_8_subtile) { 5085 TEST_REQUIRES_ARM_NEON; 5086 for (uint32_t n = 1; n <= 8; n++) { 5087 for (uint32_t m = 1; m <= 2; m++) { 5088 GemmMicrokernelTester() 5089 .mr(2) 5090 .nr(8) 5091 .kr(4) 5092 .sr(2) 5093 .m(m) 5094 .n(n) 5095 .k(8) 5096 .iterations(1) 5097 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5098 } 5099 } 5100 } 5101 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_eq_8_subtile_m)5102 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_eq_8_subtile_m) { 5103 TEST_REQUIRES_ARM_NEON; 5104 for (uint32_t m = 1; m <= 2; m++) { 5105 GemmMicrokernelTester() 5106 .mr(2) 5107 .nr(8) 5108 .kr(4) 5109 .sr(2) 5110 .m(m) 5111 .n(8) 5112 .k(8) 5113 .iterations(1) 5114 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5115 } 5116 } 5117 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_eq_8_subtile_n)5118 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_eq_8_subtile_n) { 5119 TEST_REQUIRES_ARM_NEON; 5120 for (uint32_t n = 1; n <= 8; n++) { 5121 GemmMicrokernelTester() 5122 .mr(2) 5123 .nr(8) 5124 .kr(4) 5125 .sr(2) 5126 .m(2) 5127 .n(n) 5128 .k(8) 5129 .iterations(1) 5130 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5131 } 5132 } 5133 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_lt_8)5134 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_lt_8) { 5135 TEST_REQUIRES_ARM_NEON; 5136 for (size_t k = 1; k < 8; k++) { 5137 GemmMicrokernelTester() 5138 .mr(2) 5139 .nr(8) 5140 .kr(4) 5141 .sr(2) 5142 .m(2) 5143 .n(8) 5144 .k(k) 5145 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5146 } 5147 } 5148 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_lt_8_strided_a)5149 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_lt_8_strided_a) { 5150 TEST_REQUIRES_ARM_NEON; 5151 for (size_t k = 1; k < 8; k++) { 5152 GemmMicrokernelTester() 5153 .mr(2) 5154 .nr(8) 5155 .kr(4) 5156 .sr(2) 5157 .m(2) 5158 .n(8) 5159 .k(k) 5160 .a_stride(11) 5161 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5162 } 5163 } 5164 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_lt_8_subtile)5165 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_lt_8_subtile) { 5166 TEST_REQUIRES_ARM_NEON; 5167 for (size_t k = 1; k < 8; k++) { 5168 for (uint32_t n = 1; n <= 8; n++) { 5169 for (uint32_t m = 1; m <= 2; m++) { 5170 GemmMicrokernelTester() 5171 .mr(2) 5172 .nr(8) 5173 .kr(4) 5174 .sr(2) 5175 .m(m) 5176 .n(n) 5177 .k(k) 5178 .iterations(1) 5179 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5180 } 5181 } 5182 } 5183 } 5184 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_gt_8)5185 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_gt_8) { 5186 TEST_REQUIRES_ARM_NEON; 5187 for (size_t k = 9; k < 16; k++) { 5188 GemmMicrokernelTester() 5189 .mr(2) 5190 .nr(8) 5191 .kr(4) 5192 .sr(2) 5193 .m(2) 5194 .n(8) 5195 .k(k) 5196 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5197 } 5198 } 5199 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_gt_8_strided_a)5200 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_gt_8_strided_a) { 5201 TEST_REQUIRES_ARM_NEON; 5202 for (size_t k = 9; k < 16; k++) { 5203 GemmMicrokernelTester() 5204 .mr(2) 5205 .nr(8) 5206 .kr(4) 5207 .sr(2) 5208 .m(2) 5209 .n(8) 5210 .k(k) 5211 .a_stride(19) 5212 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5213 } 5214 } 5215 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_gt_8_subtile)5216 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_gt_8_subtile) { 5217 TEST_REQUIRES_ARM_NEON; 5218 for (size_t k = 9; k < 16; k++) { 5219 for (uint32_t n = 1; n <= 8; n++) { 5220 for (uint32_t m = 1; m <= 2; m++) { 5221 GemmMicrokernelTester() 5222 .mr(2) 5223 .nr(8) 5224 .kr(4) 5225 .sr(2) 5226 .m(m) 5227 .n(n) 5228 .k(k) 5229 .iterations(1) 5230 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5231 } 5232 } 5233 } 5234 } 5235 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_div_8)5236 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_div_8) { 5237 TEST_REQUIRES_ARM_NEON; 5238 for (size_t k = 16; k <= 80; k += 8) { 5239 GemmMicrokernelTester() 5240 .mr(2) 5241 .nr(8) 5242 .kr(4) 5243 .sr(2) 5244 .m(2) 5245 .n(8) 5246 .k(k) 5247 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5248 } 5249 } 5250 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_div_8_strided_a)5251 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_div_8_strided_a) { 5252 TEST_REQUIRES_ARM_NEON; 5253 for (size_t k = 16; k <= 80; k += 8) { 5254 GemmMicrokernelTester() 5255 .mr(2) 5256 .nr(8) 5257 .kr(4) 5258 .sr(2) 5259 .m(2) 5260 .n(8) 5261 .k(k) 5262 .a_stride(83) 5263 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5264 } 5265 } 5266 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_div_8_subtile)5267 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_div_8_subtile) { 5268 TEST_REQUIRES_ARM_NEON; 5269 for (size_t k = 16; k <= 80; k += 8) { 5270 for (uint32_t n = 1; n <= 8; n++) { 5271 for (uint32_t m = 1; m <= 2; m++) { 5272 GemmMicrokernelTester() 5273 .mr(2) 5274 .nr(8) 5275 .kr(4) 5276 .sr(2) 5277 .m(m) 5278 .n(n) 5279 .k(k) 5280 .iterations(1) 5281 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5282 } 5283 } 5284 } 5285 } 5286 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,n_gt_8)5287 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, n_gt_8) { 5288 TEST_REQUIRES_ARM_NEON; 5289 for (uint32_t n = 9; n < 16; n++) { 5290 for (size_t k = 1; k <= 40; k += 9) { 5291 GemmMicrokernelTester() 5292 .mr(2) 5293 .nr(8) 5294 .kr(4) 5295 .sr(2) 5296 .m(2) 5297 .n(n) 5298 .k(k) 5299 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5300 } 5301 } 5302 } 5303 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,n_gt_8_strided_cn)5304 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, n_gt_8_strided_cn) { 5305 TEST_REQUIRES_ARM_NEON; 5306 for (uint32_t n = 9; n < 16; n++) { 5307 for (size_t k = 1; k <= 40; k += 9) { 5308 GemmMicrokernelTester() 5309 .mr(2) 5310 .nr(8) 5311 .kr(4) 5312 .sr(2) 5313 .m(2) 5314 .n(n) 5315 .k(k) 5316 .cn_stride(11) 5317 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5318 } 5319 } 5320 } 5321 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,n_gt_8_strided_a)5322 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, n_gt_8_strided_a) { 5323 TEST_REQUIRES_ARM_NEON; 5324 for (uint32_t n = 9; n < 16; n++) { 5325 for (size_t k = 1; k <= 40; k += 9) { 5326 GemmMicrokernelTester() 5327 .mr(2) 5328 .nr(8) 5329 .kr(4) 5330 .sr(2) 5331 .m(2) 5332 .n(n) 5333 .k(k) 5334 .a_stride(43) 5335 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5336 } 5337 } 5338 } 5339 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,n_gt_8_subtile)5340 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, n_gt_8_subtile) { 5341 TEST_REQUIRES_ARM_NEON; 5342 for (uint32_t n = 9; n < 16; n++) { 5343 for (size_t k = 1; k <= 40; k += 9) { 5344 for (uint32_t m = 1; m <= 2; m++) { 5345 GemmMicrokernelTester() 5346 .mr(2) 5347 .nr(8) 5348 .kr(4) 5349 .sr(2) 5350 .m(m) 5351 .n(n) 5352 .k(k) 5353 .iterations(1) 5354 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5355 } 5356 } 5357 } 5358 } 5359 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,n_div_8)5360 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, n_div_8) { 5361 TEST_REQUIRES_ARM_NEON; 5362 for (uint32_t n = 16; n <= 24; n += 8) { 5363 for (size_t k = 1; k <= 40; k += 9) { 5364 GemmMicrokernelTester() 5365 .mr(2) 5366 .nr(8) 5367 .kr(4) 5368 .sr(2) 5369 .m(2) 5370 .n(n) 5371 .k(k) 5372 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5373 } 5374 } 5375 } 5376 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,n_div_8_strided_cn)5377 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, n_div_8_strided_cn) { 5378 TEST_REQUIRES_ARM_NEON; 5379 for (uint32_t n = 16; n <= 24; n += 8) { 5380 for (size_t k = 1; k <= 40; k += 9) { 5381 GemmMicrokernelTester() 5382 .mr(2) 5383 .nr(8) 5384 .kr(4) 5385 .sr(2) 5386 .m(2) 5387 .n(n) 5388 .k(k) 5389 .cn_stride(11) 5390 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5391 } 5392 } 5393 } 5394 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,n_div_8_strided_a)5395 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, n_div_8_strided_a) { 5396 TEST_REQUIRES_ARM_NEON; 5397 for (uint32_t n = 16; n <= 24; n += 8) { 5398 for (size_t k = 1; k <= 40; k += 9) { 5399 GemmMicrokernelTester() 5400 .mr(2) 5401 .nr(8) 5402 .kr(4) 5403 .sr(2) 5404 .m(2) 5405 .n(n) 5406 .k(k) 5407 .a_stride(43) 5408 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5409 } 5410 } 5411 } 5412 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,n_div_8_subtile)5413 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, n_div_8_subtile) { 5414 TEST_REQUIRES_ARM_NEON; 5415 for (uint32_t n = 16; n <= 24; n += 8) { 5416 for (size_t k = 1; k <= 40; k += 9) { 5417 for (uint32_t m = 1; m <= 2; m++) { 5418 GemmMicrokernelTester() 5419 .mr(2) 5420 .nr(8) 5421 .kr(4) 5422 .sr(2) 5423 .m(m) 5424 .n(n) 5425 .k(k) 5426 .iterations(1) 5427 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5428 } 5429 } 5430 } 5431 } 5432 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,strided_cm_subtile)5433 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, strided_cm_subtile) { 5434 TEST_REQUIRES_ARM_NEON; 5435 for (size_t k = 1; k <= 40; k += 9) { 5436 for (uint32_t n = 1; n <= 8; n++) { 5437 for (uint32_t m = 1; m <= 2; m++) { 5438 GemmMicrokernelTester() 5439 .mr(2) 5440 .nr(8) 5441 .kr(4) 5442 .sr(2) 5443 .m(m) 5444 .n(n) 5445 .k(k) 5446 .cm_stride(11) 5447 .iterations(1) 5448 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5449 } 5450 } 5451 } 5452 } 5453 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,qmin)5454 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, qmin) { 5455 TEST_REQUIRES_ARM_NEON; 5456 GemmMicrokernelTester() 5457 .mr(2) 5458 .nr(8) 5459 .kr(4) 5460 .sr(2) 5461 .m(2) 5462 .n(8) 5463 .k(8) 5464 .qmin(128) 5465 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5466 } 5467 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,qmax)5468 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, qmax) { 5469 TEST_REQUIRES_ARM_NEON; 5470 GemmMicrokernelTester() 5471 .mr(2) 5472 .nr(8) 5473 .kr(4) 5474 .sr(2) 5475 .m(2) 5476 .n(8) 5477 .k(8) 5478 .qmax(128) 5479 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5480 } 5481 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,strided_cm)5482 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, strided_cm) { 5483 TEST_REQUIRES_ARM_NEON; 5484 GemmMicrokernelTester() 5485 .mr(2) 5486 .nr(8) 5487 .kr(4) 5488 .sr(2) 5489 .m(2) 5490 .n(8) 5491 .k(8) 5492 .cm_stride(11) 5493 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5494 } 5495 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 5496 5497 5498 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_eq_8)5499 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_eq_8) { 5500 TEST_REQUIRES_ARM_NEON; 5501 GemmMicrokernelTester() 5502 .mr(1) 5503 .nr(16) 5504 .kr(4) 5505 .sr(2) 5506 .m(1) 5507 .n(16) 5508 .k(8) 5509 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5510 } 5511 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,strided_cn)5512 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, strided_cn) { 5513 TEST_REQUIRES_ARM_NEON; 5514 GemmMicrokernelTester() 5515 .mr(1) 5516 .nr(16) 5517 .kr(4) 5518 .sr(2) 5519 .m(1) 5520 .n(16) 5521 .k(8) 5522 .cn_stride(19) 5523 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5524 } 5525 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_eq_8_strided_a)5526 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_eq_8_strided_a) { 5527 TEST_REQUIRES_ARM_NEON; 5528 GemmMicrokernelTester() 5529 .mr(1) 5530 .nr(16) 5531 .kr(4) 5532 .sr(2) 5533 .m(1) 5534 .n(16) 5535 .k(8) 5536 .a_stride(11) 5537 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5538 } 5539 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_eq_8_subtile)5540 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_eq_8_subtile) { 5541 TEST_REQUIRES_ARM_NEON; 5542 for (uint32_t n = 1; n <= 16; n++) { 5543 for (uint32_t m = 1; m <= 1; m++) { 5544 GemmMicrokernelTester() 5545 .mr(1) 5546 .nr(16) 5547 .kr(4) 5548 .sr(2) 5549 .m(m) 5550 .n(n) 5551 .k(8) 5552 .iterations(1) 5553 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5554 } 5555 } 5556 } 5557 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_eq_8_subtile_m)5558 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_eq_8_subtile_m) { 5559 TEST_REQUIRES_ARM_NEON; 5560 for (uint32_t m = 1; m <= 1; m++) { 5561 GemmMicrokernelTester() 5562 .mr(1) 5563 .nr(16) 5564 .kr(4) 5565 .sr(2) 5566 .m(m) 5567 .n(16) 5568 .k(8) 5569 .iterations(1) 5570 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5571 } 5572 } 5573 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_eq_8_subtile_n)5574 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_eq_8_subtile_n) { 5575 TEST_REQUIRES_ARM_NEON; 5576 for (uint32_t n = 1; n <= 16; n++) { 5577 GemmMicrokernelTester() 5578 .mr(1) 5579 .nr(16) 5580 .kr(4) 5581 .sr(2) 5582 .m(1) 5583 .n(n) 5584 .k(8) 5585 .iterations(1) 5586 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5587 } 5588 } 5589 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_lt_8)5590 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_lt_8) { 5591 TEST_REQUIRES_ARM_NEON; 5592 for (size_t k = 1; k < 8; k++) { 5593 GemmMicrokernelTester() 5594 .mr(1) 5595 .nr(16) 5596 .kr(4) 5597 .sr(2) 5598 .m(1) 5599 .n(16) 5600 .k(k) 5601 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5602 } 5603 } 5604 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_lt_8_strided_a)5605 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_lt_8_strided_a) { 5606 TEST_REQUIRES_ARM_NEON; 5607 for (size_t k = 1; k < 8; k++) { 5608 GemmMicrokernelTester() 5609 .mr(1) 5610 .nr(16) 5611 .kr(4) 5612 .sr(2) 5613 .m(1) 5614 .n(16) 5615 .k(k) 5616 .a_stride(11) 5617 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5618 } 5619 } 5620 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_lt_8_subtile)5621 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_lt_8_subtile) { 5622 TEST_REQUIRES_ARM_NEON; 5623 for (size_t k = 1; k < 8; k++) { 5624 for (uint32_t n = 1; n <= 16; n++) { 5625 for (uint32_t m = 1; m <= 1; m++) { 5626 GemmMicrokernelTester() 5627 .mr(1) 5628 .nr(16) 5629 .kr(4) 5630 .sr(2) 5631 .m(m) 5632 .n(n) 5633 .k(k) 5634 .iterations(1) 5635 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5636 } 5637 } 5638 } 5639 } 5640 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_gt_8)5641 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_gt_8) { 5642 TEST_REQUIRES_ARM_NEON; 5643 for (size_t k = 9; k < 16; k++) { 5644 GemmMicrokernelTester() 5645 .mr(1) 5646 .nr(16) 5647 .kr(4) 5648 .sr(2) 5649 .m(1) 5650 .n(16) 5651 .k(k) 5652 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5653 } 5654 } 5655 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_gt_8_strided_a)5656 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_gt_8_strided_a) { 5657 TEST_REQUIRES_ARM_NEON; 5658 for (size_t k = 9; k < 16; k++) { 5659 GemmMicrokernelTester() 5660 .mr(1) 5661 .nr(16) 5662 .kr(4) 5663 .sr(2) 5664 .m(1) 5665 .n(16) 5666 .k(k) 5667 .a_stride(19) 5668 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5669 } 5670 } 5671 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_gt_8_subtile)5672 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_gt_8_subtile) { 5673 TEST_REQUIRES_ARM_NEON; 5674 for (size_t k = 9; k < 16; k++) { 5675 for (uint32_t n = 1; n <= 16; n++) { 5676 for (uint32_t m = 1; m <= 1; m++) { 5677 GemmMicrokernelTester() 5678 .mr(1) 5679 .nr(16) 5680 .kr(4) 5681 .sr(2) 5682 .m(m) 5683 .n(n) 5684 .k(k) 5685 .iterations(1) 5686 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5687 } 5688 } 5689 } 5690 } 5691 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_div_8)5692 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_div_8) { 5693 TEST_REQUIRES_ARM_NEON; 5694 for (size_t k = 16; k <= 80; k += 8) { 5695 GemmMicrokernelTester() 5696 .mr(1) 5697 .nr(16) 5698 .kr(4) 5699 .sr(2) 5700 .m(1) 5701 .n(16) 5702 .k(k) 5703 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5704 } 5705 } 5706 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_div_8_strided_a)5707 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_div_8_strided_a) { 5708 TEST_REQUIRES_ARM_NEON; 5709 for (size_t k = 16; k <= 80; k += 8) { 5710 GemmMicrokernelTester() 5711 .mr(1) 5712 .nr(16) 5713 .kr(4) 5714 .sr(2) 5715 .m(1) 5716 .n(16) 5717 .k(k) 5718 .a_stride(83) 5719 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5720 } 5721 } 5722 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_div_8_subtile)5723 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_div_8_subtile) { 5724 TEST_REQUIRES_ARM_NEON; 5725 for (size_t k = 16; k <= 80; k += 8) { 5726 for (uint32_t n = 1; n <= 16; n++) { 5727 for (uint32_t m = 1; m <= 1; m++) { 5728 GemmMicrokernelTester() 5729 .mr(1) 5730 .nr(16) 5731 .kr(4) 5732 .sr(2) 5733 .m(m) 5734 .n(n) 5735 .k(k) 5736 .iterations(1) 5737 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5738 } 5739 } 5740 } 5741 } 5742 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,n_gt_16)5743 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_gt_16) { 5744 TEST_REQUIRES_ARM_NEON; 5745 for (uint32_t n = 17; n < 32; n++) { 5746 for (size_t k = 1; k <= 40; k += 9) { 5747 GemmMicrokernelTester() 5748 .mr(1) 5749 .nr(16) 5750 .kr(4) 5751 .sr(2) 5752 .m(1) 5753 .n(n) 5754 .k(k) 5755 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5756 } 5757 } 5758 } 5759 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,n_gt_16_strided_cn)5760 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_gt_16_strided_cn) { 5761 TEST_REQUIRES_ARM_NEON; 5762 for (uint32_t n = 17; n < 32; n++) { 5763 for (size_t k = 1; k <= 40; k += 9) { 5764 GemmMicrokernelTester() 5765 .mr(1) 5766 .nr(16) 5767 .kr(4) 5768 .sr(2) 5769 .m(1) 5770 .n(n) 5771 .k(k) 5772 .cn_stride(19) 5773 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5774 } 5775 } 5776 } 5777 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,n_gt_16_strided_a)5778 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_gt_16_strided_a) { 5779 TEST_REQUIRES_ARM_NEON; 5780 for (uint32_t n = 17; n < 32; n++) { 5781 for (size_t k = 1; k <= 40; k += 9) { 5782 GemmMicrokernelTester() 5783 .mr(1) 5784 .nr(16) 5785 .kr(4) 5786 .sr(2) 5787 .m(1) 5788 .n(n) 5789 .k(k) 5790 .a_stride(43) 5791 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5792 } 5793 } 5794 } 5795 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,n_gt_16_subtile)5796 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_gt_16_subtile) { 5797 TEST_REQUIRES_ARM_NEON; 5798 for (uint32_t n = 17; n < 32; n++) { 5799 for (size_t k = 1; k <= 40; k += 9) { 5800 for (uint32_t m = 1; m <= 1; m++) { 5801 GemmMicrokernelTester() 5802 .mr(1) 5803 .nr(16) 5804 .kr(4) 5805 .sr(2) 5806 .m(m) 5807 .n(n) 5808 .k(k) 5809 .iterations(1) 5810 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5811 } 5812 } 5813 } 5814 } 5815 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,n_div_16)5816 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_div_16) { 5817 TEST_REQUIRES_ARM_NEON; 5818 for (uint32_t n = 32; n <= 48; n += 16) { 5819 for (size_t k = 1; k <= 40; k += 9) { 5820 GemmMicrokernelTester() 5821 .mr(1) 5822 .nr(16) 5823 .kr(4) 5824 .sr(2) 5825 .m(1) 5826 .n(n) 5827 .k(k) 5828 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5829 } 5830 } 5831 } 5832 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,n_div_16_strided_cn)5833 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_div_16_strided_cn) { 5834 TEST_REQUIRES_ARM_NEON; 5835 for (uint32_t n = 32; n <= 48; n += 16) { 5836 for (size_t k = 1; k <= 40; k += 9) { 5837 GemmMicrokernelTester() 5838 .mr(1) 5839 .nr(16) 5840 .kr(4) 5841 .sr(2) 5842 .m(1) 5843 .n(n) 5844 .k(k) 5845 .cn_stride(19) 5846 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5847 } 5848 } 5849 } 5850 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,n_div_16_strided_a)5851 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_div_16_strided_a) { 5852 TEST_REQUIRES_ARM_NEON; 5853 for (uint32_t n = 32; n <= 48; n += 16) { 5854 for (size_t k = 1; k <= 40; k += 9) { 5855 GemmMicrokernelTester() 5856 .mr(1) 5857 .nr(16) 5858 .kr(4) 5859 .sr(2) 5860 .m(1) 5861 .n(n) 5862 .k(k) 5863 .a_stride(43) 5864 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5865 } 5866 } 5867 } 5868 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,n_div_16_subtile)5869 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_div_16_subtile) { 5870 TEST_REQUIRES_ARM_NEON; 5871 for (uint32_t n = 32; n <= 48; n += 16) { 5872 for (size_t k = 1; k <= 40; k += 9) { 5873 for (uint32_t m = 1; m <= 1; m++) { 5874 GemmMicrokernelTester() 5875 .mr(1) 5876 .nr(16) 5877 .kr(4) 5878 .sr(2) 5879 .m(m) 5880 .n(n) 5881 .k(k) 5882 .iterations(1) 5883 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5884 } 5885 } 5886 } 5887 } 5888 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,strided_cm_subtile)5889 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, strided_cm_subtile) { 5890 TEST_REQUIRES_ARM_NEON; 5891 for (size_t k = 1; k <= 40; k += 9) { 5892 for (uint32_t n = 1; n <= 16; n++) { 5893 for (uint32_t m = 1; m <= 1; m++) { 5894 GemmMicrokernelTester() 5895 .mr(1) 5896 .nr(16) 5897 .kr(4) 5898 .sr(2) 5899 .m(m) 5900 .n(n) 5901 .k(k) 5902 .cm_stride(19) 5903 .iterations(1) 5904 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5905 } 5906 } 5907 } 5908 } 5909 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,qmin)5910 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, qmin) { 5911 TEST_REQUIRES_ARM_NEON; 5912 GemmMicrokernelTester() 5913 .mr(1) 5914 .nr(16) 5915 .kr(4) 5916 .sr(2) 5917 .m(1) 5918 .n(16) 5919 .k(8) 5920 .qmin(128) 5921 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5922 } 5923 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,qmax)5924 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, qmax) { 5925 TEST_REQUIRES_ARM_NEON; 5926 GemmMicrokernelTester() 5927 .mr(1) 5928 .nr(16) 5929 .kr(4) 5930 .sr(2) 5931 .m(1) 5932 .n(16) 5933 .k(8) 5934 .qmax(128) 5935 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5936 } 5937 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,strided_cm)5938 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, strided_cm) { 5939 TEST_REQUIRES_ARM_NEON; 5940 GemmMicrokernelTester() 5941 .mr(1) 5942 .nr(16) 5943 .kr(4) 5944 .sr(2) 5945 .m(1) 5946 .n(16) 5947 .k(8) 5948 .cm_stride(19) 5949 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5950 } 5951 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 5952 5953 5954 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_eq_8)5955 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_eq_8) { 5956 TEST_REQUIRES_ARM_NEON; 5957 GemmMicrokernelTester() 5958 .mr(4) 5959 .nr(16) 5960 .kr(4) 5961 .sr(2) 5962 .m(4) 5963 .n(16) 5964 .k(8) 5965 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5966 } 5967 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,strided_cn)5968 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, strided_cn) { 5969 TEST_REQUIRES_ARM_NEON; 5970 GemmMicrokernelTester() 5971 .mr(4) 5972 .nr(16) 5973 .kr(4) 5974 .sr(2) 5975 .m(4) 5976 .n(16) 5977 .k(8) 5978 .cn_stride(19) 5979 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5980 } 5981 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_eq_8_strided_a)5982 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_eq_8_strided_a) { 5983 TEST_REQUIRES_ARM_NEON; 5984 GemmMicrokernelTester() 5985 .mr(4) 5986 .nr(16) 5987 .kr(4) 5988 .sr(2) 5989 .m(4) 5990 .n(16) 5991 .k(8) 5992 .a_stride(11) 5993 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5994 } 5995 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_eq_8_subtile)5996 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_eq_8_subtile) { 5997 TEST_REQUIRES_ARM_NEON; 5998 for (uint32_t n = 1; n <= 16; n++) { 5999 for (uint32_t m = 1; m <= 4; m++) { 6000 GemmMicrokernelTester() 6001 .mr(4) 6002 .nr(16) 6003 .kr(4) 6004 .sr(2) 6005 .m(m) 6006 .n(n) 6007 .k(8) 6008 .iterations(1) 6009 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6010 } 6011 } 6012 } 6013 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_eq_8_subtile_m)6014 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_eq_8_subtile_m) { 6015 TEST_REQUIRES_ARM_NEON; 6016 for (uint32_t m = 1; m <= 4; m++) { 6017 GemmMicrokernelTester() 6018 .mr(4) 6019 .nr(16) 6020 .kr(4) 6021 .sr(2) 6022 .m(m) 6023 .n(16) 6024 .k(8) 6025 .iterations(1) 6026 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6027 } 6028 } 6029 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_eq_8_subtile_n)6030 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_eq_8_subtile_n) { 6031 TEST_REQUIRES_ARM_NEON; 6032 for (uint32_t n = 1; n <= 16; n++) { 6033 GemmMicrokernelTester() 6034 .mr(4) 6035 .nr(16) 6036 .kr(4) 6037 .sr(2) 6038 .m(4) 6039 .n(n) 6040 .k(8) 6041 .iterations(1) 6042 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6043 } 6044 } 6045 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_lt_8)6046 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_lt_8) { 6047 TEST_REQUIRES_ARM_NEON; 6048 for (size_t k = 1; k < 8; k++) { 6049 GemmMicrokernelTester() 6050 .mr(4) 6051 .nr(16) 6052 .kr(4) 6053 .sr(2) 6054 .m(4) 6055 .n(16) 6056 .k(k) 6057 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6058 } 6059 } 6060 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_lt_8_strided_a)6061 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_lt_8_strided_a) { 6062 TEST_REQUIRES_ARM_NEON; 6063 for (size_t k = 1; k < 8; k++) { 6064 GemmMicrokernelTester() 6065 .mr(4) 6066 .nr(16) 6067 .kr(4) 6068 .sr(2) 6069 .m(4) 6070 .n(16) 6071 .k(k) 6072 .a_stride(11) 6073 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6074 } 6075 } 6076 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_lt_8_subtile)6077 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_lt_8_subtile) { 6078 TEST_REQUIRES_ARM_NEON; 6079 for (size_t k = 1; k < 8; k++) { 6080 for (uint32_t n = 1; n <= 16; n++) { 6081 for (uint32_t m = 1; m <= 4; m++) { 6082 GemmMicrokernelTester() 6083 .mr(4) 6084 .nr(16) 6085 .kr(4) 6086 .sr(2) 6087 .m(m) 6088 .n(n) 6089 .k(k) 6090 .iterations(1) 6091 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6092 } 6093 } 6094 } 6095 } 6096 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_gt_8)6097 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_gt_8) { 6098 TEST_REQUIRES_ARM_NEON; 6099 for (size_t k = 9; k < 16; k++) { 6100 GemmMicrokernelTester() 6101 .mr(4) 6102 .nr(16) 6103 .kr(4) 6104 .sr(2) 6105 .m(4) 6106 .n(16) 6107 .k(k) 6108 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6109 } 6110 } 6111 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_gt_8_strided_a)6112 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_gt_8_strided_a) { 6113 TEST_REQUIRES_ARM_NEON; 6114 for (size_t k = 9; k < 16; k++) { 6115 GemmMicrokernelTester() 6116 .mr(4) 6117 .nr(16) 6118 .kr(4) 6119 .sr(2) 6120 .m(4) 6121 .n(16) 6122 .k(k) 6123 .a_stride(19) 6124 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6125 } 6126 } 6127 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_gt_8_subtile)6128 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_gt_8_subtile) { 6129 TEST_REQUIRES_ARM_NEON; 6130 for (size_t k = 9; k < 16; k++) { 6131 for (uint32_t n = 1; n <= 16; n++) { 6132 for (uint32_t m = 1; m <= 4; m++) { 6133 GemmMicrokernelTester() 6134 .mr(4) 6135 .nr(16) 6136 .kr(4) 6137 .sr(2) 6138 .m(m) 6139 .n(n) 6140 .k(k) 6141 .iterations(1) 6142 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6143 } 6144 } 6145 } 6146 } 6147 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_div_8)6148 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_div_8) { 6149 TEST_REQUIRES_ARM_NEON; 6150 for (size_t k = 16; k <= 80; k += 8) { 6151 GemmMicrokernelTester() 6152 .mr(4) 6153 .nr(16) 6154 .kr(4) 6155 .sr(2) 6156 .m(4) 6157 .n(16) 6158 .k(k) 6159 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6160 } 6161 } 6162 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_div_8_strided_a)6163 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_div_8_strided_a) { 6164 TEST_REQUIRES_ARM_NEON; 6165 for (size_t k = 16; k <= 80; k += 8) { 6166 GemmMicrokernelTester() 6167 .mr(4) 6168 .nr(16) 6169 .kr(4) 6170 .sr(2) 6171 .m(4) 6172 .n(16) 6173 .k(k) 6174 .a_stride(83) 6175 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6176 } 6177 } 6178 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_div_8_subtile)6179 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_div_8_subtile) { 6180 TEST_REQUIRES_ARM_NEON; 6181 for (size_t k = 16; k <= 80; k += 8) { 6182 for (uint32_t n = 1; n <= 16; n++) { 6183 for (uint32_t m = 1; m <= 4; m++) { 6184 GemmMicrokernelTester() 6185 .mr(4) 6186 .nr(16) 6187 .kr(4) 6188 .sr(2) 6189 .m(m) 6190 .n(n) 6191 .k(k) 6192 .iterations(1) 6193 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6194 } 6195 } 6196 } 6197 } 6198 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,n_gt_16)6199 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, n_gt_16) { 6200 TEST_REQUIRES_ARM_NEON; 6201 for (uint32_t n = 17; n < 32; n++) { 6202 for (size_t k = 1; k <= 40; k += 9) { 6203 GemmMicrokernelTester() 6204 .mr(4) 6205 .nr(16) 6206 .kr(4) 6207 .sr(2) 6208 .m(4) 6209 .n(n) 6210 .k(k) 6211 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6212 } 6213 } 6214 } 6215 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,n_gt_16_strided_cn)6216 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, n_gt_16_strided_cn) { 6217 TEST_REQUIRES_ARM_NEON; 6218 for (uint32_t n = 17; n < 32; n++) { 6219 for (size_t k = 1; k <= 40; k += 9) { 6220 GemmMicrokernelTester() 6221 .mr(4) 6222 .nr(16) 6223 .kr(4) 6224 .sr(2) 6225 .m(4) 6226 .n(n) 6227 .k(k) 6228 .cn_stride(19) 6229 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6230 } 6231 } 6232 } 6233 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,n_gt_16_strided_a)6234 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, n_gt_16_strided_a) { 6235 TEST_REQUIRES_ARM_NEON; 6236 for (uint32_t n = 17; n < 32; n++) { 6237 for (size_t k = 1; k <= 40; k += 9) { 6238 GemmMicrokernelTester() 6239 .mr(4) 6240 .nr(16) 6241 .kr(4) 6242 .sr(2) 6243 .m(4) 6244 .n(n) 6245 .k(k) 6246 .a_stride(43) 6247 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6248 } 6249 } 6250 } 6251 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,n_gt_16_subtile)6252 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, n_gt_16_subtile) { 6253 TEST_REQUIRES_ARM_NEON; 6254 for (uint32_t n = 17; n < 32; n++) { 6255 for (size_t k = 1; k <= 40; k += 9) { 6256 for (uint32_t m = 1; m <= 4; m++) { 6257 GemmMicrokernelTester() 6258 .mr(4) 6259 .nr(16) 6260 .kr(4) 6261 .sr(2) 6262 .m(m) 6263 .n(n) 6264 .k(k) 6265 .iterations(1) 6266 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6267 } 6268 } 6269 } 6270 } 6271 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,n_div_16)6272 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, n_div_16) { 6273 TEST_REQUIRES_ARM_NEON; 6274 for (uint32_t n = 32; n <= 48; n += 16) { 6275 for (size_t k = 1; k <= 40; k += 9) { 6276 GemmMicrokernelTester() 6277 .mr(4) 6278 .nr(16) 6279 .kr(4) 6280 .sr(2) 6281 .m(4) 6282 .n(n) 6283 .k(k) 6284 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6285 } 6286 } 6287 } 6288 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,n_div_16_strided_cn)6289 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, n_div_16_strided_cn) { 6290 TEST_REQUIRES_ARM_NEON; 6291 for (uint32_t n = 32; n <= 48; n += 16) { 6292 for (size_t k = 1; k <= 40; k += 9) { 6293 GemmMicrokernelTester() 6294 .mr(4) 6295 .nr(16) 6296 .kr(4) 6297 .sr(2) 6298 .m(4) 6299 .n(n) 6300 .k(k) 6301 .cn_stride(19) 6302 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6303 } 6304 } 6305 } 6306 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,n_div_16_strided_a)6307 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, n_div_16_strided_a) { 6308 TEST_REQUIRES_ARM_NEON; 6309 for (uint32_t n = 32; n <= 48; n += 16) { 6310 for (size_t k = 1; k <= 40; k += 9) { 6311 GemmMicrokernelTester() 6312 .mr(4) 6313 .nr(16) 6314 .kr(4) 6315 .sr(2) 6316 .m(4) 6317 .n(n) 6318 .k(k) 6319 .a_stride(43) 6320 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6321 } 6322 } 6323 } 6324 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,n_div_16_subtile)6325 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, n_div_16_subtile) { 6326 TEST_REQUIRES_ARM_NEON; 6327 for (uint32_t n = 32; n <= 48; n += 16) { 6328 for (size_t k = 1; k <= 40; k += 9) { 6329 for (uint32_t m = 1; m <= 4; m++) { 6330 GemmMicrokernelTester() 6331 .mr(4) 6332 .nr(16) 6333 .kr(4) 6334 .sr(2) 6335 .m(m) 6336 .n(n) 6337 .k(k) 6338 .iterations(1) 6339 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6340 } 6341 } 6342 } 6343 } 6344 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,strided_cm_subtile)6345 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, strided_cm_subtile) { 6346 TEST_REQUIRES_ARM_NEON; 6347 for (size_t k = 1; k <= 40; k += 9) { 6348 for (uint32_t n = 1; n <= 16; n++) { 6349 for (uint32_t m = 1; m <= 4; m++) { 6350 GemmMicrokernelTester() 6351 .mr(4) 6352 .nr(16) 6353 .kr(4) 6354 .sr(2) 6355 .m(m) 6356 .n(n) 6357 .k(k) 6358 .cm_stride(19) 6359 .iterations(1) 6360 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6361 } 6362 } 6363 } 6364 } 6365 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,qmin)6366 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, qmin) { 6367 TEST_REQUIRES_ARM_NEON; 6368 GemmMicrokernelTester() 6369 .mr(4) 6370 .nr(16) 6371 .kr(4) 6372 .sr(2) 6373 .m(4) 6374 .n(16) 6375 .k(8) 6376 .qmin(128) 6377 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6378 } 6379 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,qmax)6380 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, qmax) { 6381 TEST_REQUIRES_ARM_NEON; 6382 GemmMicrokernelTester() 6383 .mr(4) 6384 .nr(16) 6385 .kr(4) 6386 .sr(2) 6387 .m(4) 6388 .n(16) 6389 .k(8) 6390 .qmax(128) 6391 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6392 } 6393 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,strided_cm)6394 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, strided_cm) { 6395 TEST_REQUIRES_ARM_NEON; 6396 GemmMicrokernelTester() 6397 .mr(4) 6398 .nr(16) 6399 .kr(4) 6400 .sr(2) 6401 .m(4) 6402 .n(16) 6403 .k(8) 6404 .cm_stride(19) 6405 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6406 } 6407 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 6408 6409 6410 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_eq_16)6411 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_eq_16) { 6412 TEST_REQUIRES_ARM_NEON; 6413 GemmMicrokernelTester() 6414 .mr(1) 6415 .nr(16) 6416 .kr(4) 6417 .sr(2) 6418 .m(1) 6419 .n(16) 6420 .k(16) 6421 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6422 } 6423 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,strided_cn)6424 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, strided_cn) { 6425 TEST_REQUIRES_ARM_NEON; 6426 GemmMicrokernelTester() 6427 .mr(1) 6428 .nr(16) 6429 .kr(4) 6430 .sr(2) 6431 .m(1) 6432 .n(16) 6433 .k(16) 6434 .cn_stride(19) 6435 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6436 } 6437 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_eq_16_strided_a)6438 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_eq_16_strided_a) { 6439 TEST_REQUIRES_ARM_NEON; 6440 GemmMicrokernelTester() 6441 .mr(1) 6442 .nr(16) 6443 .kr(4) 6444 .sr(2) 6445 .m(1) 6446 .n(16) 6447 .k(16) 6448 .a_stride(19) 6449 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6450 } 6451 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_eq_16_subtile)6452 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_eq_16_subtile) { 6453 TEST_REQUIRES_ARM_NEON; 6454 for (uint32_t n = 1; n <= 16; n++) { 6455 for (uint32_t m = 1; m <= 1; m++) { 6456 GemmMicrokernelTester() 6457 .mr(1) 6458 .nr(16) 6459 .kr(4) 6460 .sr(2) 6461 .m(m) 6462 .n(n) 6463 .k(16) 6464 .iterations(1) 6465 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6466 } 6467 } 6468 } 6469 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_eq_16_subtile_m)6470 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_eq_16_subtile_m) { 6471 TEST_REQUIRES_ARM_NEON; 6472 for (uint32_t m = 1; m <= 1; m++) { 6473 GemmMicrokernelTester() 6474 .mr(1) 6475 .nr(16) 6476 .kr(4) 6477 .sr(2) 6478 .m(m) 6479 .n(16) 6480 .k(16) 6481 .iterations(1) 6482 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6483 } 6484 } 6485 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_eq_16_subtile_n)6486 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_eq_16_subtile_n) { 6487 TEST_REQUIRES_ARM_NEON; 6488 for (uint32_t n = 1; n <= 16; n++) { 6489 GemmMicrokernelTester() 6490 .mr(1) 6491 .nr(16) 6492 .kr(4) 6493 .sr(2) 6494 .m(1) 6495 .n(n) 6496 .k(16) 6497 .iterations(1) 6498 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6499 } 6500 } 6501 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_lt_16)6502 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_lt_16) { 6503 TEST_REQUIRES_ARM_NEON; 6504 for (size_t k = 1; k < 16; k++) { 6505 GemmMicrokernelTester() 6506 .mr(1) 6507 .nr(16) 6508 .kr(4) 6509 .sr(2) 6510 .m(1) 6511 .n(16) 6512 .k(k) 6513 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6514 } 6515 } 6516 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_lt_16_strided_a)6517 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_lt_16_strided_a) { 6518 TEST_REQUIRES_ARM_NEON; 6519 for (size_t k = 1; k < 16; k++) { 6520 GemmMicrokernelTester() 6521 .mr(1) 6522 .nr(16) 6523 .kr(4) 6524 .sr(2) 6525 .m(1) 6526 .n(16) 6527 .k(k) 6528 .a_stride(19) 6529 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6530 } 6531 } 6532 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_lt_16_subtile)6533 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_lt_16_subtile) { 6534 TEST_REQUIRES_ARM_NEON; 6535 for (size_t k = 1; k < 16; k++) { 6536 for (uint32_t n = 1; n <= 16; n++) { 6537 for (uint32_t m = 1; m <= 1; m++) { 6538 GemmMicrokernelTester() 6539 .mr(1) 6540 .nr(16) 6541 .kr(4) 6542 .sr(2) 6543 .m(m) 6544 .n(n) 6545 .k(k) 6546 .iterations(1) 6547 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6548 } 6549 } 6550 } 6551 } 6552 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_gt_16)6553 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_gt_16) { 6554 TEST_REQUIRES_ARM_NEON; 6555 for (size_t k = 17; k < 32; k++) { 6556 GemmMicrokernelTester() 6557 .mr(1) 6558 .nr(16) 6559 .kr(4) 6560 .sr(2) 6561 .m(1) 6562 .n(16) 6563 .k(k) 6564 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6565 } 6566 } 6567 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_gt_16_strided_a)6568 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_gt_16_strided_a) { 6569 TEST_REQUIRES_ARM_NEON; 6570 for (size_t k = 17; k < 32; k++) { 6571 GemmMicrokernelTester() 6572 .mr(1) 6573 .nr(16) 6574 .kr(4) 6575 .sr(2) 6576 .m(1) 6577 .n(16) 6578 .k(k) 6579 .a_stride(37) 6580 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6581 } 6582 } 6583 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_gt_16_subtile)6584 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_gt_16_subtile) { 6585 TEST_REQUIRES_ARM_NEON; 6586 for (size_t k = 17; k < 32; k++) { 6587 for (uint32_t n = 1; n <= 16; n++) { 6588 for (uint32_t m = 1; m <= 1; m++) { 6589 GemmMicrokernelTester() 6590 .mr(1) 6591 .nr(16) 6592 .kr(4) 6593 .sr(2) 6594 .m(m) 6595 .n(n) 6596 .k(k) 6597 .iterations(1) 6598 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6599 } 6600 } 6601 } 6602 } 6603 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_div_16)6604 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_div_16) { 6605 TEST_REQUIRES_ARM_NEON; 6606 for (size_t k = 32; k <= 160; k += 16) { 6607 GemmMicrokernelTester() 6608 .mr(1) 6609 .nr(16) 6610 .kr(4) 6611 .sr(2) 6612 .m(1) 6613 .n(16) 6614 .k(k) 6615 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6616 } 6617 } 6618 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_div_16_strided_a)6619 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_div_16_strided_a) { 6620 TEST_REQUIRES_ARM_NEON; 6621 for (size_t k = 32; k <= 160; k += 16) { 6622 GemmMicrokernelTester() 6623 .mr(1) 6624 .nr(16) 6625 .kr(4) 6626 .sr(2) 6627 .m(1) 6628 .n(16) 6629 .k(k) 6630 .a_stride(163) 6631 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6632 } 6633 } 6634 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_div_16_subtile)6635 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_div_16_subtile) { 6636 TEST_REQUIRES_ARM_NEON; 6637 for (size_t k = 32; k <= 160; k += 16) { 6638 for (uint32_t n = 1; n <= 16; n++) { 6639 for (uint32_t m = 1; m <= 1; m++) { 6640 GemmMicrokernelTester() 6641 .mr(1) 6642 .nr(16) 6643 .kr(4) 6644 .sr(2) 6645 .m(m) 6646 .n(n) 6647 .k(k) 6648 .iterations(1) 6649 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6650 } 6651 } 6652 } 6653 } 6654 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,n_gt_16)6655 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_gt_16) { 6656 TEST_REQUIRES_ARM_NEON; 6657 for (uint32_t n = 17; n < 32; n++) { 6658 for (size_t k = 1; k <= 80; k += 17) { 6659 GemmMicrokernelTester() 6660 .mr(1) 6661 .nr(16) 6662 .kr(4) 6663 .sr(2) 6664 .m(1) 6665 .n(n) 6666 .k(k) 6667 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6668 } 6669 } 6670 } 6671 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,n_gt_16_strided_cn)6672 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_gt_16_strided_cn) { 6673 TEST_REQUIRES_ARM_NEON; 6674 for (uint32_t n = 17; n < 32; n++) { 6675 for (size_t k = 1; k <= 80; k += 17) { 6676 GemmMicrokernelTester() 6677 .mr(1) 6678 .nr(16) 6679 .kr(4) 6680 .sr(2) 6681 .m(1) 6682 .n(n) 6683 .k(k) 6684 .cn_stride(19) 6685 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6686 } 6687 } 6688 } 6689 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,n_gt_16_strided_a)6690 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_gt_16_strided_a) { 6691 TEST_REQUIRES_ARM_NEON; 6692 for (uint32_t n = 17; n < 32; n++) { 6693 for (size_t k = 1; k <= 80; k += 17) { 6694 GemmMicrokernelTester() 6695 .mr(1) 6696 .nr(16) 6697 .kr(4) 6698 .sr(2) 6699 .m(1) 6700 .n(n) 6701 .k(k) 6702 .a_stride(83) 6703 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6704 } 6705 } 6706 } 6707 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,n_gt_16_subtile)6708 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_gt_16_subtile) { 6709 TEST_REQUIRES_ARM_NEON; 6710 for (uint32_t n = 17; n < 32; n++) { 6711 for (size_t k = 1; k <= 80; k += 17) { 6712 for (uint32_t m = 1; m <= 1; m++) { 6713 GemmMicrokernelTester() 6714 .mr(1) 6715 .nr(16) 6716 .kr(4) 6717 .sr(2) 6718 .m(m) 6719 .n(n) 6720 .k(k) 6721 .iterations(1) 6722 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6723 } 6724 } 6725 } 6726 } 6727 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,n_div_16)6728 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_div_16) { 6729 TEST_REQUIRES_ARM_NEON; 6730 for (uint32_t n = 32; n <= 48; n += 16) { 6731 for (size_t k = 1; k <= 80; k += 17) { 6732 GemmMicrokernelTester() 6733 .mr(1) 6734 .nr(16) 6735 .kr(4) 6736 .sr(2) 6737 .m(1) 6738 .n(n) 6739 .k(k) 6740 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6741 } 6742 } 6743 } 6744 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,n_div_16_strided_cn)6745 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_div_16_strided_cn) { 6746 TEST_REQUIRES_ARM_NEON; 6747 for (uint32_t n = 32; n <= 48; n += 16) { 6748 for (size_t k = 1; k <= 80; k += 17) { 6749 GemmMicrokernelTester() 6750 .mr(1) 6751 .nr(16) 6752 .kr(4) 6753 .sr(2) 6754 .m(1) 6755 .n(n) 6756 .k(k) 6757 .cn_stride(19) 6758 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6759 } 6760 } 6761 } 6762 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,n_div_16_strided_a)6763 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_div_16_strided_a) { 6764 TEST_REQUIRES_ARM_NEON; 6765 for (uint32_t n = 32; n <= 48; n += 16) { 6766 for (size_t k = 1; k <= 80; k += 17) { 6767 GemmMicrokernelTester() 6768 .mr(1) 6769 .nr(16) 6770 .kr(4) 6771 .sr(2) 6772 .m(1) 6773 .n(n) 6774 .k(k) 6775 .a_stride(83) 6776 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6777 } 6778 } 6779 } 6780 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,n_div_16_subtile)6781 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_div_16_subtile) { 6782 TEST_REQUIRES_ARM_NEON; 6783 for (uint32_t n = 32; n <= 48; n += 16) { 6784 for (size_t k = 1; k <= 80; k += 17) { 6785 for (uint32_t m = 1; m <= 1; m++) { 6786 GemmMicrokernelTester() 6787 .mr(1) 6788 .nr(16) 6789 .kr(4) 6790 .sr(2) 6791 .m(m) 6792 .n(n) 6793 .k(k) 6794 .iterations(1) 6795 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6796 } 6797 } 6798 } 6799 } 6800 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,strided_cm_subtile)6801 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, strided_cm_subtile) { 6802 TEST_REQUIRES_ARM_NEON; 6803 for (size_t k = 1; k <= 80; k += 17) { 6804 for (uint32_t n = 1; n <= 16; n++) { 6805 for (uint32_t m = 1; m <= 1; m++) { 6806 GemmMicrokernelTester() 6807 .mr(1) 6808 .nr(16) 6809 .kr(4) 6810 .sr(2) 6811 .m(m) 6812 .n(n) 6813 .k(k) 6814 .cm_stride(19) 6815 .iterations(1) 6816 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6817 } 6818 } 6819 } 6820 } 6821 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,qmin)6822 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, qmin) { 6823 TEST_REQUIRES_ARM_NEON; 6824 GemmMicrokernelTester() 6825 .mr(1) 6826 .nr(16) 6827 .kr(4) 6828 .sr(2) 6829 .m(1) 6830 .n(16) 6831 .k(16) 6832 .qmin(128) 6833 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6834 } 6835 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,qmax)6836 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, qmax) { 6837 TEST_REQUIRES_ARM_NEON; 6838 GemmMicrokernelTester() 6839 .mr(1) 6840 .nr(16) 6841 .kr(4) 6842 .sr(2) 6843 .m(1) 6844 .n(16) 6845 .k(16) 6846 .qmax(128) 6847 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6848 } 6849 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,strided_cm)6850 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, strided_cm) { 6851 TEST_REQUIRES_ARM_NEON; 6852 GemmMicrokernelTester() 6853 .mr(1) 6854 .nr(16) 6855 .kr(4) 6856 .sr(2) 6857 .m(1) 6858 .n(16) 6859 .k(16) 6860 .cm_stride(19) 6861 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6862 } 6863 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 6864 6865 6866 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_eq_16)6867 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_eq_16) { 6868 TEST_REQUIRES_ARM_NEON; 6869 GemmMicrokernelTester() 6870 .mr(1) 6871 .nr(8) 6872 .kr(2) 6873 .sr(4) 6874 .m(1) 6875 .n(8) 6876 .k(16) 6877 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6878 } 6879 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,strided_cn)6880 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, strided_cn) { 6881 TEST_REQUIRES_ARM_NEON; 6882 GemmMicrokernelTester() 6883 .mr(1) 6884 .nr(8) 6885 .kr(2) 6886 .sr(4) 6887 .m(1) 6888 .n(8) 6889 .k(16) 6890 .cn_stride(11) 6891 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6892 } 6893 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_eq_16_strided_a)6894 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_eq_16_strided_a) { 6895 TEST_REQUIRES_ARM_NEON; 6896 GemmMicrokernelTester() 6897 .mr(1) 6898 .nr(8) 6899 .kr(2) 6900 .sr(4) 6901 .m(1) 6902 .n(8) 6903 .k(16) 6904 .a_stride(19) 6905 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6906 } 6907 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_eq_16_subtile)6908 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_eq_16_subtile) { 6909 TEST_REQUIRES_ARM_NEON; 6910 for (uint32_t n = 1; n <= 8; n++) { 6911 for (uint32_t m = 1; m <= 1; m++) { 6912 GemmMicrokernelTester() 6913 .mr(1) 6914 .nr(8) 6915 .kr(2) 6916 .sr(4) 6917 .m(m) 6918 .n(n) 6919 .k(16) 6920 .iterations(1) 6921 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6922 } 6923 } 6924 } 6925 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_eq_16_subtile_m)6926 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_eq_16_subtile_m) { 6927 TEST_REQUIRES_ARM_NEON; 6928 for (uint32_t m = 1; m <= 1; m++) { 6929 GemmMicrokernelTester() 6930 .mr(1) 6931 .nr(8) 6932 .kr(2) 6933 .sr(4) 6934 .m(m) 6935 .n(8) 6936 .k(16) 6937 .iterations(1) 6938 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6939 } 6940 } 6941 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_eq_16_subtile_n)6942 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_eq_16_subtile_n) { 6943 TEST_REQUIRES_ARM_NEON; 6944 for (uint32_t n = 1; n <= 8; n++) { 6945 GemmMicrokernelTester() 6946 .mr(1) 6947 .nr(8) 6948 .kr(2) 6949 .sr(4) 6950 .m(1) 6951 .n(n) 6952 .k(16) 6953 .iterations(1) 6954 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6955 } 6956 } 6957 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_lt_16)6958 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_lt_16) { 6959 TEST_REQUIRES_ARM_NEON; 6960 for (size_t k = 1; k < 16; k++) { 6961 GemmMicrokernelTester() 6962 .mr(1) 6963 .nr(8) 6964 .kr(2) 6965 .sr(4) 6966 .m(1) 6967 .n(8) 6968 .k(k) 6969 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6970 } 6971 } 6972 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_lt_16_strided_a)6973 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_lt_16_strided_a) { 6974 TEST_REQUIRES_ARM_NEON; 6975 for (size_t k = 1; k < 16; k++) { 6976 GemmMicrokernelTester() 6977 .mr(1) 6978 .nr(8) 6979 .kr(2) 6980 .sr(4) 6981 .m(1) 6982 .n(8) 6983 .k(k) 6984 .a_stride(19) 6985 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6986 } 6987 } 6988 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_lt_16_subtile)6989 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_lt_16_subtile) { 6990 TEST_REQUIRES_ARM_NEON; 6991 for (size_t k = 1; k < 16; k++) { 6992 for (uint32_t n = 1; n <= 8; n++) { 6993 for (uint32_t m = 1; m <= 1; m++) { 6994 GemmMicrokernelTester() 6995 .mr(1) 6996 .nr(8) 6997 .kr(2) 6998 .sr(4) 6999 .m(m) 7000 .n(n) 7001 .k(k) 7002 .iterations(1) 7003 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7004 } 7005 } 7006 } 7007 } 7008 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_gt_16)7009 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_gt_16) { 7010 TEST_REQUIRES_ARM_NEON; 7011 for (size_t k = 17; k < 32; k++) { 7012 GemmMicrokernelTester() 7013 .mr(1) 7014 .nr(8) 7015 .kr(2) 7016 .sr(4) 7017 .m(1) 7018 .n(8) 7019 .k(k) 7020 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7021 } 7022 } 7023 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_gt_16_strided_a)7024 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_gt_16_strided_a) { 7025 TEST_REQUIRES_ARM_NEON; 7026 for (size_t k = 17; k < 32; k++) { 7027 GemmMicrokernelTester() 7028 .mr(1) 7029 .nr(8) 7030 .kr(2) 7031 .sr(4) 7032 .m(1) 7033 .n(8) 7034 .k(k) 7035 .a_stride(37) 7036 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7037 } 7038 } 7039 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_gt_16_subtile)7040 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_gt_16_subtile) { 7041 TEST_REQUIRES_ARM_NEON; 7042 for (size_t k = 17; k < 32; k++) { 7043 for (uint32_t n = 1; n <= 8; n++) { 7044 for (uint32_t m = 1; m <= 1; m++) { 7045 GemmMicrokernelTester() 7046 .mr(1) 7047 .nr(8) 7048 .kr(2) 7049 .sr(4) 7050 .m(m) 7051 .n(n) 7052 .k(k) 7053 .iterations(1) 7054 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7055 } 7056 } 7057 } 7058 } 7059 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_div_16)7060 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_div_16) { 7061 TEST_REQUIRES_ARM_NEON; 7062 for (size_t k = 32; k <= 160; k += 16) { 7063 GemmMicrokernelTester() 7064 .mr(1) 7065 .nr(8) 7066 .kr(2) 7067 .sr(4) 7068 .m(1) 7069 .n(8) 7070 .k(k) 7071 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7072 } 7073 } 7074 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_div_16_strided_a)7075 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_div_16_strided_a) { 7076 TEST_REQUIRES_ARM_NEON; 7077 for (size_t k = 32; k <= 160; k += 16) { 7078 GemmMicrokernelTester() 7079 .mr(1) 7080 .nr(8) 7081 .kr(2) 7082 .sr(4) 7083 .m(1) 7084 .n(8) 7085 .k(k) 7086 .a_stride(163) 7087 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7088 } 7089 } 7090 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_div_16_subtile)7091 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_div_16_subtile) { 7092 TEST_REQUIRES_ARM_NEON; 7093 for (size_t k = 32; k <= 160; k += 16) { 7094 for (uint32_t n = 1; n <= 8; n++) { 7095 for (uint32_t m = 1; m <= 1; m++) { 7096 GemmMicrokernelTester() 7097 .mr(1) 7098 .nr(8) 7099 .kr(2) 7100 .sr(4) 7101 .m(m) 7102 .n(n) 7103 .k(k) 7104 .iterations(1) 7105 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7106 } 7107 } 7108 } 7109 } 7110 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,n_gt_8)7111 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, n_gt_8) { 7112 TEST_REQUIRES_ARM_NEON; 7113 for (uint32_t n = 9; n < 16; n++) { 7114 for (size_t k = 1; k <= 80; k += 17) { 7115 GemmMicrokernelTester() 7116 .mr(1) 7117 .nr(8) 7118 .kr(2) 7119 .sr(4) 7120 .m(1) 7121 .n(n) 7122 .k(k) 7123 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7124 } 7125 } 7126 } 7127 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,n_gt_8_strided_cn)7128 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, n_gt_8_strided_cn) { 7129 TEST_REQUIRES_ARM_NEON; 7130 for (uint32_t n = 9; n < 16; n++) { 7131 for (size_t k = 1; k <= 80; k += 17) { 7132 GemmMicrokernelTester() 7133 .mr(1) 7134 .nr(8) 7135 .kr(2) 7136 .sr(4) 7137 .m(1) 7138 .n(n) 7139 .k(k) 7140 .cn_stride(11) 7141 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7142 } 7143 } 7144 } 7145 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,n_gt_8_strided_a)7146 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, n_gt_8_strided_a) { 7147 TEST_REQUIRES_ARM_NEON; 7148 for (uint32_t n = 9; n < 16; n++) { 7149 for (size_t k = 1; k <= 80; k += 17) { 7150 GemmMicrokernelTester() 7151 .mr(1) 7152 .nr(8) 7153 .kr(2) 7154 .sr(4) 7155 .m(1) 7156 .n(n) 7157 .k(k) 7158 .a_stride(83) 7159 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7160 } 7161 } 7162 } 7163 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,n_gt_8_subtile)7164 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, n_gt_8_subtile) { 7165 TEST_REQUIRES_ARM_NEON; 7166 for (uint32_t n = 9; n < 16; n++) { 7167 for (size_t k = 1; k <= 80; k += 17) { 7168 for (uint32_t m = 1; m <= 1; m++) { 7169 GemmMicrokernelTester() 7170 .mr(1) 7171 .nr(8) 7172 .kr(2) 7173 .sr(4) 7174 .m(m) 7175 .n(n) 7176 .k(k) 7177 .iterations(1) 7178 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7179 } 7180 } 7181 } 7182 } 7183 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,n_div_8)7184 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, n_div_8) { 7185 TEST_REQUIRES_ARM_NEON; 7186 for (uint32_t n = 16; n <= 24; n += 8) { 7187 for (size_t k = 1; k <= 80; k += 17) { 7188 GemmMicrokernelTester() 7189 .mr(1) 7190 .nr(8) 7191 .kr(2) 7192 .sr(4) 7193 .m(1) 7194 .n(n) 7195 .k(k) 7196 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7197 } 7198 } 7199 } 7200 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,n_div_8_strided_cn)7201 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, n_div_8_strided_cn) { 7202 TEST_REQUIRES_ARM_NEON; 7203 for (uint32_t n = 16; n <= 24; n += 8) { 7204 for (size_t k = 1; k <= 80; k += 17) { 7205 GemmMicrokernelTester() 7206 .mr(1) 7207 .nr(8) 7208 .kr(2) 7209 .sr(4) 7210 .m(1) 7211 .n(n) 7212 .k(k) 7213 .cn_stride(11) 7214 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7215 } 7216 } 7217 } 7218 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,n_div_8_strided_a)7219 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, n_div_8_strided_a) { 7220 TEST_REQUIRES_ARM_NEON; 7221 for (uint32_t n = 16; n <= 24; n += 8) { 7222 for (size_t k = 1; k <= 80; k += 17) { 7223 GemmMicrokernelTester() 7224 .mr(1) 7225 .nr(8) 7226 .kr(2) 7227 .sr(4) 7228 .m(1) 7229 .n(n) 7230 .k(k) 7231 .a_stride(83) 7232 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7233 } 7234 } 7235 } 7236 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,n_div_8_subtile)7237 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, n_div_8_subtile) { 7238 TEST_REQUIRES_ARM_NEON; 7239 for (uint32_t n = 16; n <= 24; n += 8) { 7240 for (size_t k = 1; k <= 80; k += 17) { 7241 for (uint32_t m = 1; m <= 1; m++) { 7242 GemmMicrokernelTester() 7243 .mr(1) 7244 .nr(8) 7245 .kr(2) 7246 .sr(4) 7247 .m(m) 7248 .n(n) 7249 .k(k) 7250 .iterations(1) 7251 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7252 } 7253 } 7254 } 7255 } 7256 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,strided_cm_subtile)7257 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, strided_cm_subtile) { 7258 TEST_REQUIRES_ARM_NEON; 7259 for (size_t k = 1; k <= 80; k += 17) { 7260 for (uint32_t n = 1; n <= 8; n++) { 7261 for (uint32_t m = 1; m <= 1; m++) { 7262 GemmMicrokernelTester() 7263 .mr(1) 7264 .nr(8) 7265 .kr(2) 7266 .sr(4) 7267 .m(m) 7268 .n(n) 7269 .k(k) 7270 .cm_stride(11) 7271 .iterations(1) 7272 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7273 } 7274 } 7275 } 7276 } 7277 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,qmin)7278 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, qmin) { 7279 TEST_REQUIRES_ARM_NEON; 7280 GemmMicrokernelTester() 7281 .mr(1) 7282 .nr(8) 7283 .kr(2) 7284 .sr(4) 7285 .m(1) 7286 .n(8) 7287 .k(16) 7288 .qmin(128) 7289 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7290 } 7291 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,qmax)7292 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, qmax) { 7293 TEST_REQUIRES_ARM_NEON; 7294 GemmMicrokernelTester() 7295 .mr(1) 7296 .nr(8) 7297 .kr(2) 7298 .sr(4) 7299 .m(1) 7300 .n(8) 7301 .k(16) 7302 .qmax(128) 7303 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7304 } 7305 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,strided_cm)7306 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, strided_cm) { 7307 TEST_REQUIRES_ARM_NEON; 7308 GemmMicrokernelTester() 7309 .mr(1) 7310 .nr(8) 7311 .kr(2) 7312 .sr(4) 7313 .m(1) 7314 .n(8) 7315 .k(16) 7316 .cm_stride(11) 7317 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7318 } 7319 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 7320 7321 7322 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_eq_16)7323 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16) { 7324 TEST_REQUIRES_ARM_NEON; 7325 GemmMicrokernelTester() 7326 .mr(4) 7327 .nr(8) 7328 .kr(2) 7329 .sr(4) 7330 .m(4) 7331 .n(8) 7332 .k(16) 7333 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7334 } 7335 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,strided_cn)7336 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, strided_cn) { 7337 TEST_REQUIRES_ARM_NEON; 7338 GemmMicrokernelTester() 7339 .mr(4) 7340 .nr(8) 7341 .kr(2) 7342 .sr(4) 7343 .m(4) 7344 .n(8) 7345 .k(16) 7346 .cn_stride(11) 7347 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7348 } 7349 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_eq_16_strided_a)7350 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16_strided_a) { 7351 TEST_REQUIRES_ARM_NEON; 7352 GemmMicrokernelTester() 7353 .mr(4) 7354 .nr(8) 7355 .kr(2) 7356 .sr(4) 7357 .m(4) 7358 .n(8) 7359 .k(16) 7360 .a_stride(19) 7361 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7362 } 7363 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_eq_16_subtile)7364 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16_subtile) { 7365 TEST_REQUIRES_ARM_NEON; 7366 for (uint32_t n = 1; n <= 8; n++) { 7367 for (uint32_t m = 1; m <= 4; m++) { 7368 GemmMicrokernelTester() 7369 .mr(4) 7370 .nr(8) 7371 .kr(2) 7372 .sr(4) 7373 .m(m) 7374 .n(n) 7375 .k(16) 7376 .iterations(1) 7377 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7378 } 7379 } 7380 } 7381 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_eq_16_subtile_m)7382 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16_subtile_m) { 7383 TEST_REQUIRES_ARM_NEON; 7384 for (uint32_t m = 1; m <= 4; m++) { 7385 GemmMicrokernelTester() 7386 .mr(4) 7387 .nr(8) 7388 .kr(2) 7389 .sr(4) 7390 .m(m) 7391 .n(8) 7392 .k(16) 7393 .iterations(1) 7394 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7395 } 7396 } 7397 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_eq_16_subtile_n)7398 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16_subtile_n) { 7399 TEST_REQUIRES_ARM_NEON; 7400 for (uint32_t n = 1; n <= 8; n++) { 7401 GemmMicrokernelTester() 7402 .mr(4) 7403 .nr(8) 7404 .kr(2) 7405 .sr(4) 7406 .m(4) 7407 .n(n) 7408 .k(16) 7409 .iterations(1) 7410 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7411 } 7412 } 7413 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_lt_16)7414 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_lt_16) { 7415 TEST_REQUIRES_ARM_NEON; 7416 for (size_t k = 1; k < 16; k++) { 7417 GemmMicrokernelTester() 7418 .mr(4) 7419 .nr(8) 7420 .kr(2) 7421 .sr(4) 7422 .m(4) 7423 .n(8) 7424 .k(k) 7425 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7426 } 7427 } 7428 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_lt_16_strided_a)7429 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_lt_16_strided_a) { 7430 TEST_REQUIRES_ARM_NEON; 7431 for (size_t k = 1; k < 16; k++) { 7432 GemmMicrokernelTester() 7433 .mr(4) 7434 .nr(8) 7435 .kr(2) 7436 .sr(4) 7437 .m(4) 7438 .n(8) 7439 .k(k) 7440 .a_stride(19) 7441 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7442 } 7443 } 7444 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_lt_16_subtile)7445 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_lt_16_subtile) { 7446 TEST_REQUIRES_ARM_NEON; 7447 for (size_t k = 1; k < 16; k++) { 7448 for (uint32_t n = 1; n <= 8; n++) { 7449 for (uint32_t m = 1; m <= 4; m++) { 7450 GemmMicrokernelTester() 7451 .mr(4) 7452 .nr(8) 7453 .kr(2) 7454 .sr(4) 7455 .m(m) 7456 .n(n) 7457 .k(k) 7458 .iterations(1) 7459 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7460 } 7461 } 7462 } 7463 } 7464 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_gt_16)7465 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_gt_16) { 7466 TEST_REQUIRES_ARM_NEON; 7467 for (size_t k = 17; k < 32; k++) { 7468 GemmMicrokernelTester() 7469 .mr(4) 7470 .nr(8) 7471 .kr(2) 7472 .sr(4) 7473 .m(4) 7474 .n(8) 7475 .k(k) 7476 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7477 } 7478 } 7479 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_gt_16_strided_a)7480 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_gt_16_strided_a) { 7481 TEST_REQUIRES_ARM_NEON; 7482 for (size_t k = 17; k < 32; k++) { 7483 GemmMicrokernelTester() 7484 .mr(4) 7485 .nr(8) 7486 .kr(2) 7487 .sr(4) 7488 .m(4) 7489 .n(8) 7490 .k(k) 7491 .a_stride(37) 7492 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7493 } 7494 } 7495 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_gt_16_subtile)7496 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_gt_16_subtile) { 7497 TEST_REQUIRES_ARM_NEON; 7498 for (size_t k = 17; k < 32; k++) { 7499 for (uint32_t n = 1; n <= 8; n++) { 7500 for (uint32_t m = 1; m <= 4; m++) { 7501 GemmMicrokernelTester() 7502 .mr(4) 7503 .nr(8) 7504 .kr(2) 7505 .sr(4) 7506 .m(m) 7507 .n(n) 7508 .k(k) 7509 .iterations(1) 7510 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7511 } 7512 } 7513 } 7514 } 7515 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_div_16)7516 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_div_16) { 7517 TEST_REQUIRES_ARM_NEON; 7518 for (size_t k = 32; k <= 160; k += 16) { 7519 GemmMicrokernelTester() 7520 .mr(4) 7521 .nr(8) 7522 .kr(2) 7523 .sr(4) 7524 .m(4) 7525 .n(8) 7526 .k(k) 7527 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7528 } 7529 } 7530 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_div_16_strided_a)7531 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_div_16_strided_a) { 7532 TEST_REQUIRES_ARM_NEON; 7533 for (size_t k = 32; k <= 160; k += 16) { 7534 GemmMicrokernelTester() 7535 .mr(4) 7536 .nr(8) 7537 .kr(2) 7538 .sr(4) 7539 .m(4) 7540 .n(8) 7541 .k(k) 7542 .a_stride(163) 7543 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7544 } 7545 } 7546 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_div_16_subtile)7547 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_div_16_subtile) { 7548 TEST_REQUIRES_ARM_NEON; 7549 for (size_t k = 32; k <= 160; k += 16) { 7550 for (uint32_t n = 1; n <= 8; n++) { 7551 for (uint32_t m = 1; m <= 4; m++) { 7552 GemmMicrokernelTester() 7553 .mr(4) 7554 .nr(8) 7555 .kr(2) 7556 .sr(4) 7557 .m(m) 7558 .n(n) 7559 .k(k) 7560 .iterations(1) 7561 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7562 } 7563 } 7564 } 7565 } 7566 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,n_gt_8)7567 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_gt_8) { 7568 TEST_REQUIRES_ARM_NEON; 7569 for (uint32_t n = 9; n < 16; n++) { 7570 for (size_t k = 1; k <= 80; k += 17) { 7571 GemmMicrokernelTester() 7572 .mr(4) 7573 .nr(8) 7574 .kr(2) 7575 .sr(4) 7576 .m(4) 7577 .n(n) 7578 .k(k) 7579 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7580 } 7581 } 7582 } 7583 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,n_gt_8_strided_cn)7584 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_gt_8_strided_cn) { 7585 TEST_REQUIRES_ARM_NEON; 7586 for (uint32_t n = 9; n < 16; n++) { 7587 for (size_t k = 1; k <= 80; k += 17) { 7588 GemmMicrokernelTester() 7589 .mr(4) 7590 .nr(8) 7591 .kr(2) 7592 .sr(4) 7593 .m(4) 7594 .n(n) 7595 .k(k) 7596 .cn_stride(11) 7597 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7598 } 7599 } 7600 } 7601 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,n_gt_8_strided_a)7602 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_gt_8_strided_a) { 7603 TEST_REQUIRES_ARM_NEON; 7604 for (uint32_t n = 9; n < 16; n++) { 7605 for (size_t k = 1; k <= 80; k += 17) { 7606 GemmMicrokernelTester() 7607 .mr(4) 7608 .nr(8) 7609 .kr(2) 7610 .sr(4) 7611 .m(4) 7612 .n(n) 7613 .k(k) 7614 .a_stride(83) 7615 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7616 } 7617 } 7618 } 7619 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,n_gt_8_subtile)7620 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_gt_8_subtile) { 7621 TEST_REQUIRES_ARM_NEON; 7622 for (uint32_t n = 9; n < 16; n++) { 7623 for (size_t k = 1; k <= 80; k += 17) { 7624 for (uint32_t m = 1; m <= 4; m++) { 7625 GemmMicrokernelTester() 7626 .mr(4) 7627 .nr(8) 7628 .kr(2) 7629 .sr(4) 7630 .m(m) 7631 .n(n) 7632 .k(k) 7633 .iterations(1) 7634 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7635 } 7636 } 7637 } 7638 } 7639 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,n_div_8)7640 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_div_8) { 7641 TEST_REQUIRES_ARM_NEON; 7642 for (uint32_t n = 16; n <= 24; n += 8) { 7643 for (size_t k = 1; k <= 80; k += 17) { 7644 GemmMicrokernelTester() 7645 .mr(4) 7646 .nr(8) 7647 .kr(2) 7648 .sr(4) 7649 .m(4) 7650 .n(n) 7651 .k(k) 7652 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7653 } 7654 } 7655 } 7656 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,n_div_8_strided_cn)7657 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_div_8_strided_cn) { 7658 TEST_REQUIRES_ARM_NEON; 7659 for (uint32_t n = 16; n <= 24; n += 8) { 7660 for (size_t k = 1; k <= 80; k += 17) { 7661 GemmMicrokernelTester() 7662 .mr(4) 7663 .nr(8) 7664 .kr(2) 7665 .sr(4) 7666 .m(4) 7667 .n(n) 7668 .k(k) 7669 .cn_stride(11) 7670 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7671 } 7672 } 7673 } 7674 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,n_div_8_strided_a)7675 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_div_8_strided_a) { 7676 TEST_REQUIRES_ARM_NEON; 7677 for (uint32_t n = 16; n <= 24; n += 8) { 7678 for (size_t k = 1; k <= 80; k += 17) { 7679 GemmMicrokernelTester() 7680 .mr(4) 7681 .nr(8) 7682 .kr(2) 7683 .sr(4) 7684 .m(4) 7685 .n(n) 7686 .k(k) 7687 .a_stride(83) 7688 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7689 } 7690 } 7691 } 7692 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,n_div_8_subtile)7693 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_div_8_subtile) { 7694 TEST_REQUIRES_ARM_NEON; 7695 for (uint32_t n = 16; n <= 24; n += 8) { 7696 for (size_t k = 1; k <= 80; k += 17) { 7697 for (uint32_t m = 1; m <= 4; m++) { 7698 GemmMicrokernelTester() 7699 .mr(4) 7700 .nr(8) 7701 .kr(2) 7702 .sr(4) 7703 .m(m) 7704 .n(n) 7705 .k(k) 7706 .iterations(1) 7707 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7708 } 7709 } 7710 } 7711 } 7712 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,strided_cm_subtile)7713 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, strided_cm_subtile) { 7714 TEST_REQUIRES_ARM_NEON; 7715 for (size_t k = 1; k <= 80; k += 17) { 7716 for (uint32_t n = 1; n <= 8; n++) { 7717 for (uint32_t m = 1; m <= 4; m++) { 7718 GemmMicrokernelTester() 7719 .mr(4) 7720 .nr(8) 7721 .kr(2) 7722 .sr(4) 7723 .m(m) 7724 .n(n) 7725 .k(k) 7726 .cm_stride(11) 7727 .iterations(1) 7728 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7729 } 7730 } 7731 } 7732 } 7733 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,qmin)7734 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, qmin) { 7735 TEST_REQUIRES_ARM_NEON; 7736 GemmMicrokernelTester() 7737 .mr(4) 7738 .nr(8) 7739 .kr(2) 7740 .sr(4) 7741 .m(4) 7742 .n(8) 7743 .k(16) 7744 .qmin(128) 7745 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7746 } 7747 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,qmax)7748 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, qmax) { 7749 TEST_REQUIRES_ARM_NEON; 7750 GemmMicrokernelTester() 7751 .mr(4) 7752 .nr(8) 7753 .kr(2) 7754 .sr(4) 7755 .m(4) 7756 .n(8) 7757 .k(16) 7758 .qmax(128) 7759 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7760 } 7761 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,strided_cm)7762 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, strided_cm) { 7763 TEST_REQUIRES_ARM_NEON; 7764 GemmMicrokernelTester() 7765 .mr(4) 7766 .nr(8) 7767 .kr(2) 7768 .sr(4) 7769 .m(4) 7770 .n(8) 7771 .k(16) 7772 .cm_stride(11) 7773 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7774 } 7775 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 7776 7777 7778 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_eq_16)7779 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_eq_16) { 7780 TEST_REQUIRES_ARM_NEON; 7781 GemmMicrokernelTester() 7782 .mr(1) 7783 .nr(16) 7784 .kr(2) 7785 .sr(4) 7786 .m(1) 7787 .n(16) 7788 .k(16) 7789 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7790 } 7791 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,strided_cn)7792 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, strided_cn) { 7793 TEST_REQUIRES_ARM_NEON; 7794 GemmMicrokernelTester() 7795 .mr(1) 7796 .nr(16) 7797 .kr(2) 7798 .sr(4) 7799 .m(1) 7800 .n(16) 7801 .k(16) 7802 .cn_stride(19) 7803 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7804 } 7805 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_eq_16_strided_a)7806 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_eq_16_strided_a) { 7807 TEST_REQUIRES_ARM_NEON; 7808 GemmMicrokernelTester() 7809 .mr(1) 7810 .nr(16) 7811 .kr(2) 7812 .sr(4) 7813 .m(1) 7814 .n(16) 7815 .k(16) 7816 .a_stride(19) 7817 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7818 } 7819 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_eq_16_subtile)7820 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_eq_16_subtile) { 7821 TEST_REQUIRES_ARM_NEON; 7822 for (uint32_t n = 1; n <= 16; n++) { 7823 for (uint32_t m = 1; m <= 1; m++) { 7824 GemmMicrokernelTester() 7825 .mr(1) 7826 .nr(16) 7827 .kr(2) 7828 .sr(4) 7829 .m(m) 7830 .n(n) 7831 .k(16) 7832 .iterations(1) 7833 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7834 } 7835 } 7836 } 7837 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_eq_16_subtile_m)7838 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_eq_16_subtile_m) { 7839 TEST_REQUIRES_ARM_NEON; 7840 for (uint32_t m = 1; m <= 1; m++) { 7841 GemmMicrokernelTester() 7842 .mr(1) 7843 .nr(16) 7844 .kr(2) 7845 .sr(4) 7846 .m(m) 7847 .n(16) 7848 .k(16) 7849 .iterations(1) 7850 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7851 } 7852 } 7853 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_eq_16_subtile_n)7854 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_eq_16_subtile_n) { 7855 TEST_REQUIRES_ARM_NEON; 7856 for (uint32_t n = 1; n <= 16; n++) { 7857 GemmMicrokernelTester() 7858 .mr(1) 7859 .nr(16) 7860 .kr(2) 7861 .sr(4) 7862 .m(1) 7863 .n(n) 7864 .k(16) 7865 .iterations(1) 7866 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7867 } 7868 } 7869 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_lt_16)7870 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_lt_16) { 7871 TEST_REQUIRES_ARM_NEON; 7872 for (size_t k = 1; k < 16; k++) { 7873 GemmMicrokernelTester() 7874 .mr(1) 7875 .nr(16) 7876 .kr(2) 7877 .sr(4) 7878 .m(1) 7879 .n(16) 7880 .k(k) 7881 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7882 } 7883 } 7884 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_lt_16_strided_a)7885 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_lt_16_strided_a) { 7886 TEST_REQUIRES_ARM_NEON; 7887 for (size_t k = 1; k < 16; k++) { 7888 GemmMicrokernelTester() 7889 .mr(1) 7890 .nr(16) 7891 .kr(2) 7892 .sr(4) 7893 .m(1) 7894 .n(16) 7895 .k(k) 7896 .a_stride(19) 7897 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7898 } 7899 } 7900 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_lt_16_subtile)7901 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_lt_16_subtile) { 7902 TEST_REQUIRES_ARM_NEON; 7903 for (size_t k = 1; k < 16; k++) { 7904 for (uint32_t n = 1; n <= 16; n++) { 7905 for (uint32_t m = 1; m <= 1; m++) { 7906 GemmMicrokernelTester() 7907 .mr(1) 7908 .nr(16) 7909 .kr(2) 7910 .sr(4) 7911 .m(m) 7912 .n(n) 7913 .k(k) 7914 .iterations(1) 7915 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7916 } 7917 } 7918 } 7919 } 7920 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_gt_16)7921 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_gt_16) { 7922 TEST_REQUIRES_ARM_NEON; 7923 for (size_t k = 17; k < 32; k++) { 7924 GemmMicrokernelTester() 7925 .mr(1) 7926 .nr(16) 7927 .kr(2) 7928 .sr(4) 7929 .m(1) 7930 .n(16) 7931 .k(k) 7932 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7933 } 7934 } 7935 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_gt_16_strided_a)7936 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_gt_16_strided_a) { 7937 TEST_REQUIRES_ARM_NEON; 7938 for (size_t k = 17; k < 32; k++) { 7939 GemmMicrokernelTester() 7940 .mr(1) 7941 .nr(16) 7942 .kr(2) 7943 .sr(4) 7944 .m(1) 7945 .n(16) 7946 .k(k) 7947 .a_stride(37) 7948 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7949 } 7950 } 7951 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_gt_16_subtile)7952 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_gt_16_subtile) { 7953 TEST_REQUIRES_ARM_NEON; 7954 for (size_t k = 17; k < 32; k++) { 7955 for (uint32_t n = 1; n <= 16; n++) { 7956 for (uint32_t m = 1; m <= 1; m++) { 7957 GemmMicrokernelTester() 7958 .mr(1) 7959 .nr(16) 7960 .kr(2) 7961 .sr(4) 7962 .m(m) 7963 .n(n) 7964 .k(k) 7965 .iterations(1) 7966 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7967 } 7968 } 7969 } 7970 } 7971 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_div_16)7972 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_div_16) { 7973 TEST_REQUIRES_ARM_NEON; 7974 for (size_t k = 32; k <= 160; k += 16) { 7975 GemmMicrokernelTester() 7976 .mr(1) 7977 .nr(16) 7978 .kr(2) 7979 .sr(4) 7980 .m(1) 7981 .n(16) 7982 .k(k) 7983 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7984 } 7985 } 7986 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_div_16_strided_a)7987 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_div_16_strided_a) { 7988 TEST_REQUIRES_ARM_NEON; 7989 for (size_t k = 32; k <= 160; k += 16) { 7990 GemmMicrokernelTester() 7991 .mr(1) 7992 .nr(16) 7993 .kr(2) 7994 .sr(4) 7995 .m(1) 7996 .n(16) 7997 .k(k) 7998 .a_stride(163) 7999 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8000 } 8001 } 8002 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_div_16_subtile)8003 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_div_16_subtile) { 8004 TEST_REQUIRES_ARM_NEON; 8005 for (size_t k = 32; k <= 160; k += 16) { 8006 for (uint32_t n = 1; n <= 16; n++) { 8007 for (uint32_t m = 1; m <= 1; m++) { 8008 GemmMicrokernelTester() 8009 .mr(1) 8010 .nr(16) 8011 .kr(2) 8012 .sr(4) 8013 .m(m) 8014 .n(n) 8015 .k(k) 8016 .iterations(1) 8017 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8018 } 8019 } 8020 } 8021 } 8022 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,n_gt_16)8023 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_gt_16) { 8024 TEST_REQUIRES_ARM_NEON; 8025 for (uint32_t n = 17; n < 32; n++) { 8026 for (size_t k = 1; k <= 80; k += 17) { 8027 GemmMicrokernelTester() 8028 .mr(1) 8029 .nr(16) 8030 .kr(2) 8031 .sr(4) 8032 .m(1) 8033 .n(n) 8034 .k(k) 8035 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8036 } 8037 } 8038 } 8039 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,n_gt_16_strided_cn)8040 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_gt_16_strided_cn) { 8041 TEST_REQUIRES_ARM_NEON; 8042 for (uint32_t n = 17; n < 32; n++) { 8043 for (size_t k = 1; k <= 80; k += 17) { 8044 GemmMicrokernelTester() 8045 .mr(1) 8046 .nr(16) 8047 .kr(2) 8048 .sr(4) 8049 .m(1) 8050 .n(n) 8051 .k(k) 8052 .cn_stride(19) 8053 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8054 } 8055 } 8056 } 8057 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,n_gt_16_strided_a)8058 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_gt_16_strided_a) { 8059 TEST_REQUIRES_ARM_NEON; 8060 for (uint32_t n = 17; n < 32; n++) { 8061 for (size_t k = 1; k <= 80; k += 17) { 8062 GemmMicrokernelTester() 8063 .mr(1) 8064 .nr(16) 8065 .kr(2) 8066 .sr(4) 8067 .m(1) 8068 .n(n) 8069 .k(k) 8070 .a_stride(83) 8071 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8072 } 8073 } 8074 } 8075 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,n_gt_16_subtile)8076 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_gt_16_subtile) { 8077 TEST_REQUIRES_ARM_NEON; 8078 for (uint32_t n = 17; n < 32; n++) { 8079 for (size_t k = 1; k <= 80; k += 17) { 8080 for (uint32_t m = 1; m <= 1; m++) { 8081 GemmMicrokernelTester() 8082 .mr(1) 8083 .nr(16) 8084 .kr(2) 8085 .sr(4) 8086 .m(m) 8087 .n(n) 8088 .k(k) 8089 .iterations(1) 8090 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8091 } 8092 } 8093 } 8094 } 8095 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,n_div_16)8096 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_div_16) { 8097 TEST_REQUIRES_ARM_NEON; 8098 for (uint32_t n = 32; n <= 48; n += 16) { 8099 for (size_t k = 1; k <= 80; k += 17) { 8100 GemmMicrokernelTester() 8101 .mr(1) 8102 .nr(16) 8103 .kr(2) 8104 .sr(4) 8105 .m(1) 8106 .n(n) 8107 .k(k) 8108 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8109 } 8110 } 8111 } 8112 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,n_div_16_strided_cn)8113 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_div_16_strided_cn) { 8114 TEST_REQUIRES_ARM_NEON; 8115 for (uint32_t n = 32; n <= 48; n += 16) { 8116 for (size_t k = 1; k <= 80; k += 17) { 8117 GemmMicrokernelTester() 8118 .mr(1) 8119 .nr(16) 8120 .kr(2) 8121 .sr(4) 8122 .m(1) 8123 .n(n) 8124 .k(k) 8125 .cn_stride(19) 8126 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8127 } 8128 } 8129 } 8130 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,n_div_16_strided_a)8131 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_div_16_strided_a) { 8132 TEST_REQUIRES_ARM_NEON; 8133 for (uint32_t n = 32; n <= 48; n += 16) { 8134 for (size_t k = 1; k <= 80; k += 17) { 8135 GemmMicrokernelTester() 8136 .mr(1) 8137 .nr(16) 8138 .kr(2) 8139 .sr(4) 8140 .m(1) 8141 .n(n) 8142 .k(k) 8143 .a_stride(83) 8144 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8145 } 8146 } 8147 } 8148 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,n_div_16_subtile)8149 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_div_16_subtile) { 8150 TEST_REQUIRES_ARM_NEON; 8151 for (uint32_t n = 32; n <= 48; n += 16) { 8152 for (size_t k = 1; k <= 80; k += 17) { 8153 for (uint32_t m = 1; m <= 1; m++) { 8154 GemmMicrokernelTester() 8155 .mr(1) 8156 .nr(16) 8157 .kr(2) 8158 .sr(4) 8159 .m(m) 8160 .n(n) 8161 .k(k) 8162 .iterations(1) 8163 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8164 } 8165 } 8166 } 8167 } 8168 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,strided_cm_subtile)8169 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, strided_cm_subtile) { 8170 TEST_REQUIRES_ARM_NEON; 8171 for (size_t k = 1; k <= 80; k += 17) { 8172 for (uint32_t n = 1; n <= 16; n++) { 8173 for (uint32_t m = 1; m <= 1; m++) { 8174 GemmMicrokernelTester() 8175 .mr(1) 8176 .nr(16) 8177 .kr(2) 8178 .sr(4) 8179 .m(m) 8180 .n(n) 8181 .k(k) 8182 .cm_stride(19) 8183 .iterations(1) 8184 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8185 } 8186 } 8187 } 8188 } 8189 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,qmin)8190 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, qmin) { 8191 TEST_REQUIRES_ARM_NEON; 8192 GemmMicrokernelTester() 8193 .mr(1) 8194 .nr(16) 8195 .kr(2) 8196 .sr(4) 8197 .m(1) 8198 .n(16) 8199 .k(16) 8200 .qmin(128) 8201 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8202 } 8203 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,qmax)8204 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, qmax) { 8205 TEST_REQUIRES_ARM_NEON; 8206 GemmMicrokernelTester() 8207 .mr(1) 8208 .nr(16) 8209 .kr(2) 8210 .sr(4) 8211 .m(1) 8212 .n(16) 8213 .k(16) 8214 .qmax(128) 8215 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8216 } 8217 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,strided_cm)8218 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, strided_cm) { 8219 TEST_REQUIRES_ARM_NEON; 8220 GemmMicrokernelTester() 8221 .mr(1) 8222 .nr(16) 8223 .kr(2) 8224 .sr(4) 8225 .m(1) 8226 .n(16) 8227 .k(16) 8228 .cm_stride(19) 8229 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8230 } 8231 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 8232 8233 8234 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_eq_8)8235 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_eq_8) { 8236 TEST_REQUIRES_ARM_NEON; 8237 GemmMicrokernelTester() 8238 .mr(2) 8239 .nr(8) 8240 .kr(4) 8241 .sr(1) 8242 .m(2) 8243 .n(8) 8244 .k(8) 8245 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8246 } 8247 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,strided_cn)8248 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, strided_cn) { 8249 TEST_REQUIRES_ARM_NEON; 8250 GemmMicrokernelTester() 8251 .mr(2) 8252 .nr(8) 8253 .kr(4) 8254 .sr(1) 8255 .m(2) 8256 .n(8) 8257 .k(8) 8258 .cn_stride(11) 8259 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8260 } 8261 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_eq_8_strided_a)8262 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_eq_8_strided_a) { 8263 TEST_REQUIRES_ARM_NEON; 8264 GemmMicrokernelTester() 8265 .mr(2) 8266 .nr(8) 8267 .kr(4) 8268 .sr(1) 8269 .m(2) 8270 .n(8) 8271 .k(8) 8272 .a_stride(11) 8273 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8274 } 8275 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_eq_8_subtile)8276 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_eq_8_subtile) { 8277 TEST_REQUIRES_ARM_NEON; 8278 for (uint32_t n = 1; n <= 8; n++) { 8279 for (uint32_t m = 1; m <= 2; m++) { 8280 GemmMicrokernelTester() 8281 .mr(2) 8282 .nr(8) 8283 .kr(4) 8284 .sr(1) 8285 .m(m) 8286 .n(n) 8287 .k(8) 8288 .iterations(1) 8289 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8290 } 8291 } 8292 } 8293 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_eq_8_subtile_m)8294 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_eq_8_subtile_m) { 8295 TEST_REQUIRES_ARM_NEON; 8296 for (uint32_t m = 1; m <= 2; m++) { 8297 GemmMicrokernelTester() 8298 .mr(2) 8299 .nr(8) 8300 .kr(4) 8301 .sr(1) 8302 .m(m) 8303 .n(8) 8304 .k(8) 8305 .iterations(1) 8306 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8307 } 8308 } 8309 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_eq_8_subtile_n)8310 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_eq_8_subtile_n) { 8311 TEST_REQUIRES_ARM_NEON; 8312 for (uint32_t n = 1; n <= 8; n++) { 8313 GemmMicrokernelTester() 8314 .mr(2) 8315 .nr(8) 8316 .kr(4) 8317 .sr(1) 8318 .m(2) 8319 .n(n) 8320 .k(8) 8321 .iterations(1) 8322 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8323 } 8324 } 8325 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_lt_8)8326 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_lt_8) { 8327 TEST_REQUIRES_ARM_NEON; 8328 for (size_t k = 1; k < 8; k++) { 8329 GemmMicrokernelTester() 8330 .mr(2) 8331 .nr(8) 8332 .kr(4) 8333 .sr(1) 8334 .m(2) 8335 .n(8) 8336 .k(k) 8337 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8338 } 8339 } 8340 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_lt_8_strided_a)8341 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_lt_8_strided_a) { 8342 TEST_REQUIRES_ARM_NEON; 8343 for (size_t k = 1; k < 8; k++) { 8344 GemmMicrokernelTester() 8345 .mr(2) 8346 .nr(8) 8347 .kr(4) 8348 .sr(1) 8349 .m(2) 8350 .n(8) 8351 .k(k) 8352 .a_stride(11) 8353 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8354 } 8355 } 8356 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_lt_8_subtile)8357 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_lt_8_subtile) { 8358 TEST_REQUIRES_ARM_NEON; 8359 for (size_t k = 1; k < 8; k++) { 8360 for (uint32_t n = 1; n <= 8; n++) { 8361 for (uint32_t m = 1; m <= 2; m++) { 8362 GemmMicrokernelTester() 8363 .mr(2) 8364 .nr(8) 8365 .kr(4) 8366 .sr(1) 8367 .m(m) 8368 .n(n) 8369 .k(k) 8370 .iterations(1) 8371 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8372 } 8373 } 8374 } 8375 } 8376 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_gt_8)8377 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_gt_8) { 8378 TEST_REQUIRES_ARM_NEON; 8379 for (size_t k = 9; k < 16; k++) { 8380 GemmMicrokernelTester() 8381 .mr(2) 8382 .nr(8) 8383 .kr(4) 8384 .sr(1) 8385 .m(2) 8386 .n(8) 8387 .k(k) 8388 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8389 } 8390 } 8391 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_gt_8_strided_a)8392 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_gt_8_strided_a) { 8393 TEST_REQUIRES_ARM_NEON; 8394 for (size_t k = 9; k < 16; k++) { 8395 GemmMicrokernelTester() 8396 .mr(2) 8397 .nr(8) 8398 .kr(4) 8399 .sr(1) 8400 .m(2) 8401 .n(8) 8402 .k(k) 8403 .a_stride(19) 8404 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8405 } 8406 } 8407 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_gt_8_subtile)8408 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_gt_8_subtile) { 8409 TEST_REQUIRES_ARM_NEON; 8410 for (size_t k = 9; k < 16; k++) { 8411 for (uint32_t n = 1; n <= 8; n++) { 8412 for (uint32_t m = 1; m <= 2; m++) { 8413 GemmMicrokernelTester() 8414 .mr(2) 8415 .nr(8) 8416 .kr(4) 8417 .sr(1) 8418 .m(m) 8419 .n(n) 8420 .k(k) 8421 .iterations(1) 8422 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8423 } 8424 } 8425 } 8426 } 8427 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_div_8)8428 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_div_8) { 8429 TEST_REQUIRES_ARM_NEON; 8430 for (size_t k = 16; k <= 80; k += 8) { 8431 GemmMicrokernelTester() 8432 .mr(2) 8433 .nr(8) 8434 .kr(4) 8435 .sr(1) 8436 .m(2) 8437 .n(8) 8438 .k(k) 8439 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8440 } 8441 } 8442 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_div_8_strided_a)8443 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_div_8_strided_a) { 8444 TEST_REQUIRES_ARM_NEON; 8445 for (size_t k = 16; k <= 80; k += 8) { 8446 GemmMicrokernelTester() 8447 .mr(2) 8448 .nr(8) 8449 .kr(4) 8450 .sr(1) 8451 .m(2) 8452 .n(8) 8453 .k(k) 8454 .a_stride(83) 8455 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8456 } 8457 } 8458 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_div_8_subtile)8459 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_div_8_subtile) { 8460 TEST_REQUIRES_ARM_NEON; 8461 for (size_t k = 16; k <= 80; k += 8) { 8462 for (uint32_t n = 1; n <= 8; n++) { 8463 for (uint32_t m = 1; m <= 2; m++) { 8464 GemmMicrokernelTester() 8465 .mr(2) 8466 .nr(8) 8467 .kr(4) 8468 .sr(1) 8469 .m(m) 8470 .n(n) 8471 .k(k) 8472 .iterations(1) 8473 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8474 } 8475 } 8476 } 8477 } 8478 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,n_gt_8)8479 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_gt_8) { 8480 TEST_REQUIRES_ARM_NEON; 8481 for (uint32_t n = 9; n < 16; n++) { 8482 for (size_t k = 1; k <= 40; k += 9) { 8483 GemmMicrokernelTester() 8484 .mr(2) 8485 .nr(8) 8486 .kr(4) 8487 .sr(1) 8488 .m(2) 8489 .n(n) 8490 .k(k) 8491 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8492 } 8493 } 8494 } 8495 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,n_gt_8_strided_cn)8496 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_gt_8_strided_cn) { 8497 TEST_REQUIRES_ARM_NEON; 8498 for (uint32_t n = 9; n < 16; n++) { 8499 for (size_t k = 1; k <= 40; k += 9) { 8500 GemmMicrokernelTester() 8501 .mr(2) 8502 .nr(8) 8503 .kr(4) 8504 .sr(1) 8505 .m(2) 8506 .n(n) 8507 .k(k) 8508 .cn_stride(11) 8509 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8510 } 8511 } 8512 } 8513 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,n_gt_8_strided_a)8514 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_gt_8_strided_a) { 8515 TEST_REQUIRES_ARM_NEON; 8516 for (uint32_t n = 9; n < 16; n++) { 8517 for (size_t k = 1; k <= 40; k += 9) { 8518 GemmMicrokernelTester() 8519 .mr(2) 8520 .nr(8) 8521 .kr(4) 8522 .sr(1) 8523 .m(2) 8524 .n(n) 8525 .k(k) 8526 .a_stride(43) 8527 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8528 } 8529 } 8530 } 8531 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,n_gt_8_subtile)8532 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_gt_8_subtile) { 8533 TEST_REQUIRES_ARM_NEON; 8534 for (uint32_t n = 9; n < 16; n++) { 8535 for (size_t k = 1; k <= 40; k += 9) { 8536 for (uint32_t m = 1; m <= 2; m++) { 8537 GemmMicrokernelTester() 8538 .mr(2) 8539 .nr(8) 8540 .kr(4) 8541 .sr(1) 8542 .m(m) 8543 .n(n) 8544 .k(k) 8545 .iterations(1) 8546 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8547 } 8548 } 8549 } 8550 } 8551 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,n_div_8)8552 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_div_8) { 8553 TEST_REQUIRES_ARM_NEON; 8554 for (uint32_t n = 16; n <= 24; n += 8) { 8555 for (size_t k = 1; k <= 40; k += 9) { 8556 GemmMicrokernelTester() 8557 .mr(2) 8558 .nr(8) 8559 .kr(4) 8560 .sr(1) 8561 .m(2) 8562 .n(n) 8563 .k(k) 8564 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8565 } 8566 } 8567 } 8568 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,n_div_8_strided_cn)8569 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_div_8_strided_cn) { 8570 TEST_REQUIRES_ARM_NEON; 8571 for (uint32_t n = 16; n <= 24; n += 8) { 8572 for (size_t k = 1; k <= 40; k += 9) { 8573 GemmMicrokernelTester() 8574 .mr(2) 8575 .nr(8) 8576 .kr(4) 8577 .sr(1) 8578 .m(2) 8579 .n(n) 8580 .k(k) 8581 .cn_stride(11) 8582 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8583 } 8584 } 8585 } 8586 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,n_div_8_strided_a)8587 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_div_8_strided_a) { 8588 TEST_REQUIRES_ARM_NEON; 8589 for (uint32_t n = 16; n <= 24; n += 8) { 8590 for (size_t k = 1; k <= 40; k += 9) { 8591 GemmMicrokernelTester() 8592 .mr(2) 8593 .nr(8) 8594 .kr(4) 8595 .sr(1) 8596 .m(2) 8597 .n(n) 8598 .k(k) 8599 .a_stride(43) 8600 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8601 } 8602 } 8603 } 8604 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,n_div_8_subtile)8605 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_div_8_subtile) { 8606 TEST_REQUIRES_ARM_NEON; 8607 for (uint32_t n = 16; n <= 24; n += 8) { 8608 for (size_t k = 1; k <= 40; k += 9) { 8609 for (uint32_t m = 1; m <= 2; m++) { 8610 GemmMicrokernelTester() 8611 .mr(2) 8612 .nr(8) 8613 .kr(4) 8614 .sr(1) 8615 .m(m) 8616 .n(n) 8617 .k(k) 8618 .iterations(1) 8619 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8620 } 8621 } 8622 } 8623 } 8624 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,strided_cm_subtile)8625 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, strided_cm_subtile) { 8626 TEST_REQUIRES_ARM_NEON; 8627 for (size_t k = 1; k <= 40; k += 9) { 8628 for (uint32_t n = 1; n <= 8; n++) { 8629 for (uint32_t m = 1; m <= 2; m++) { 8630 GemmMicrokernelTester() 8631 .mr(2) 8632 .nr(8) 8633 .kr(4) 8634 .sr(1) 8635 .m(m) 8636 .n(n) 8637 .k(k) 8638 .cm_stride(11) 8639 .iterations(1) 8640 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8641 } 8642 } 8643 } 8644 } 8645 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,qmin)8646 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, qmin) { 8647 TEST_REQUIRES_ARM_NEON; 8648 GemmMicrokernelTester() 8649 .mr(2) 8650 .nr(8) 8651 .kr(4) 8652 .sr(1) 8653 .m(2) 8654 .n(8) 8655 .k(8) 8656 .qmin(128) 8657 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8658 } 8659 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,qmax)8660 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, qmax) { 8661 TEST_REQUIRES_ARM_NEON; 8662 GemmMicrokernelTester() 8663 .mr(2) 8664 .nr(8) 8665 .kr(4) 8666 .sr(1) 8667 .m(2) 8668 .n(8) 8669 .k(8) 8670 .qmax(128) 8671 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8672 } 8673 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,strided_cm)8674 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, strided_cm) { 8675 TEST_REQUIRES_ARM_NEON; 8676 GemmMicrokernelTester() 8677 .mr(2) 8678 .nr(8) 8679 .kr(4) 8680 .sr(1) 8681 .m(2) 8682 .n(8) 8683 .k(8) 8684 .cm_stride(11) 8685 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8686 } 8687 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 8688 8689 8690 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_eq_8)8691 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_eq_8) { 8692 TEST_REQUIRES_ARM_NEON; 8693 GemmMicrokernelTester() 8694 .mr(3) 8695 .nr(16) 8696 .kr(4) 8697 .sr(1) 8698 .m(3) 8699 .n(16) 8700 .k(8) 8701 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8702 } 8703 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,strided_cn)8704 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, strided_cn) { 8705 TEST_REQUIRES_ARM_NEON; 8706 GemmMicrokernelTester() 8707 .mr(3) 8708 .nr(16) 8709 .kr(4) 8710 .sr(1) 8711 .m(3) 8712 .n(16) 8713 .k(8) 8714 .cn_stride(19) 8715 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8716 } 8717 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_eq_8_strided_a)8718 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_eq_8_strided_a) { 8719 TEST_REQUIRES_ARM_NEON; 8720 GemmMicrokernelTester() 8721 .mr(3) 8722 .nr(16) 8723 .kr(4) 8724 .sr(1) 8725 .m(3) 8726 .n(16) 8727 .k(8) 8728 .a_stride(11) 8729 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8730 } 8731 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_eq_8_subtile)8732 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_eq_8_subtile) { 8733 TEST_REQUIRES_ARM_NEON; 8734 for (uint32_t n = 1; n <= 16; n++) { 8735 for (uint32_t m = 1; m <= 3; m++) { 8736 GemmMicrokernelTester() 8737 .mr(3) 8738 .nr(16) 8739 .kr(4) 8740 .sr(1) 8741 .m(m) 8742 .n(n) 8743 .k(8) 8744 .iterations(1) 8745 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8746 } 8747 } 8748 } 8749 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_eq_8_subtile_m)8750 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_eq_8_subtile_m) { 8751 TEST_REQUIRES_ARM_NEON; 8752 for (uint32_t m = 1; m <= 3; m++) { 8753 GemmMicrokernelTester() 8754 .mr(3) 8755 .nr(16) 8756 .kr(4) 8757 .sr(1) 8758 .m(m) 8759 .n(16) 8760 .k(8) 8761 .iterations(1) 8762 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8763 } 8764 } 8765 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_eq_8_subtile_n)8766 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_eq_8_subtile_n) { 8767 TEST_REQUIRES_ARM_NEON; 8768 for (uint32_t n = 1; n <= 16; n++) { 8769 GemmMicrokernelTester() 8770 .mr(3) 8771 .nr(16) 8772 .kr(4) 8773 .sr(1) 8774 .m(3) 8775 .n(n) 8776 .k(8) 8777 .iterations(1) 8778 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8779 } 8780 } 8781 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_lt_8)8782 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_lt_8) { 8783 TEST_REQUIRES_ARM_NEON; 8784 for (size_t k = 1; k < 8; k++) { 8785 GemmMicrokernelTester() 8786 .mr(3) 8787 .nr(16) 8788 .kr(4) 8789 .sr(1) 8790 .m(3) 8791 .n(16) 8792 .k(k) 8793 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8794 } 8795 } 8796 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_lt_8_strided_a)8797 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_lt_8_strided_a) { 8798 TEST_REQUIRES_ARM_NEON; 8799 for (size_t k = 1; k < 8; k++) { 8800 GemmMicrokernelTester() 8801 .mr(3) 8802 .nr(16) 8803 .kr(4) 8804 .sr(1) 8805 .m(3) 8806 .n(16) 8807 .k(k) 8808 .a_stride(11) 8809 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8810 } 8811 } 8812 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_lt_8_subtile)8813 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_lt_8_subtile) { 8814 TEST_REQUIRES_ARM_NEON; 8815 for (size_t k = 1; k < 8; k++) { 8816 for (uint32_t n = 1; n <= 16; n++) { 8817 for (uint32_t m = 1; m <= 3; m++) { 8818 GemmMicrokernelTester() 8819 .mr(3) 8820 .nr(16) 8821 .kr(4) 8822 .sr(1) 8823 .m(m) 8824 .n(n) 8825 .k(k) 8826 .iterations(1) 8827 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8828 } 8829 } 8830 } 8831 } 8832 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_gt_8)8833 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_gt_8) { 8834 TEST_REQUIRES_ARM_NEON; 8835 for (size_t k = 9; k < 16; k++) { 8836 GemmMicrokernelTester() 8837 .mr(3) 8838 .nr(16) 8839 .kr(4) 8840 .sr(1) 8841 .m(3) 8842 .n(16) 8843 .k(k) 8844 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8845 } 8846 } 8847 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_gt_8_strided_a)8848 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_gt_8_strided_a) { 8849 TEST_REQUIRES_ARM_NEON; 8850 for (size_t k = 9; k < 16; k++) { 8851 GemmMicrokernelTester() 8852 .mr(3) 8853 .nr(16) 8854 .kr(4) 8855 .sr(1) 8856 .m(3) 8857 .n(16) 8858 .k(k) 8859 .a_stride(19) 8860 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8861 } 8862 } 8863 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_gt_8_subtile)8864 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_gt_8_subtile) { 8865 TEST_REQUIRES_ARM_NEON; 8866 for (size_t k = 9; k < 16; k++) { 8867 for (uint32_t n = 1; n <= 16; n++) { 8868 for (uint32_t m = 1; m <= 3; m++) { 8869 GemmMicrokernelTester() 8870 .mr(3) 8871 .nr(16) 8872 .kr(4) 8873 .sr(1) 8874 .m(m) 8875 .n(n) 8876 .k(k) 8877 .iterations(1) 8878 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8879 } 8880 } 8881 } 8882 } 8883 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_div_8)8884 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_div_8) { 8885 TEST_REQUIRES_ARM_NEON; 8886 for (size_t k = 16; k <= 80; k += 8) { 8887 GemmMicrokernelTester() 8888 .mr(3) 8889 .nr(16) 8890 .kr(4) 8891 .sr(1) 8892 .m(3) 8893 .n(16) 8894 .k(k) 8895 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8896 } 8897 } 8898 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_div_8_strided_a)8899 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_div_8_strided_a) { 8900 TEST_REQUIRES_ARM_NEON; 8901 for (size_t k = 16; k <= 80; k += 8) { 8902 GemmMicrokernelTester() 8903 .mr(3) 8904 .nr(16) 8905 .kr(4) 8906 .sr(1) 8907 .m(3) 8908 .n(16) 8909 .k(k) 8910 .a_stride(83) 8911 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8912 } 8913 } 8914 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_div_8_subtile)8915 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_div_8_subtile) { 8916 TEST_REQUIRES_ARM_NEON; 8917 for (size_t k = 16; k <= 80; k += 8) { 8918 for (uint32_t n = 1; n <= 16; n++) { 8919 for (uint32_t m = 1; m <= 3; m++) { 8920 GemmMicrokernelTester() 8921 .mr(3) 8922 .nr(16) 8923 .kr(4) 8924 .sr(1) 8925 .m(m) 8926 .n(n) 8927 .k(k) 8928 .iterations(1) 8929 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8930 } 8931 } 8932 } 8933 } 8934 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,n_gt_16)8935 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_gt_16) { 8936 TEST_REQUIRES_ARM_NEON; 8937 for (uint32_t n = 17; n < 32; n++) { 8938 for (size_t k = 1; k <= 40; k += 9) { 8939 GemmMicrokernelTester() 8940 .mr(3) 8941 .nr(16) 8942 .kr(4) 8943 .sr(1) 8944 .m(3) 8945 .n(n) 8946 .k(k) 8947 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8948 } 8949 } 8950 } 8951 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,n_gt_16_strided_cn)8952 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_gt_16_strided_cn) { 8953 TEST_REQUIRES_ARM_NEON; 8954 for (uint32_t n = 17; n < 32; n++) { 8955 for (size_t k = 1; k <= 40; k += 9) { 8956 GemmMicrokernelTester() 8957 .mr(3) 8958 .nr(16) 8959 .kr(4) 8960 .sr(1) 8961 .m(3) 8962 .n(n) 8963 .k(k) 8964 .cn_stride(19) 8965 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8966 } 8967 } 8968 } 8969 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,n_gt_16_strided_a)8970 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_gt_16_strided_a) { 8971 TEST_REQUIRES_ARM_NEON; 8972 for (uint32_t n = 17; n < 32; n++) { 8973 for (size_t k = 1; k <= 40; k += 9) { 8974 GemmMicrokernelTester() 8975 .mr(3) 8976 .nr(16) 8977 .kr(4) 8978 .sr(1) 8979 .m(3) 8980 .n(n) 8981 .k(k) 8982 .a_stride(43) 8983 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8984 } 8985 } 8986 } 8987 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,n_gt_16_subtile)8988 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_gt_16_subtile) { 8989 TEST_REQUIRES_ARM_NEON; 8990 for (uint32_t n = 17; n < 32; n++) { 8991 for (size_t k = 1; k <= 40; k += 9) { 8992 for (uint32_t m = 1; m <= 3; m++) { 8993 GemmMicrokernelTester() 8994 .mr(3) 8995 .nr(16) 8996 .kr(4) 8997 .sr(1) 8998 .m(m) 8999 .n(n) 9000 .k(k) 9001 .iterations(1) 9002 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9003 } 9004 } 9005 } 9006 } 9007 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,n_div_16)9008 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_div_16) { 9009 TEST_REQUIRES_ARM_NEON; 9010 for (uint32_t n = 32; n <= 48; n += 16) { 9011 for (size_t k = 1; k <= 40; k += 9) { 9012 GemmMicrokernelTester() 9013 .mr(3) 9014 .nr(16) 9015 .kr(4) 9016 .sr(1) 9017 .m(3) 9018 .n(n) 9019 .k(k) 9020 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9021 } 9022 } 9023 } 9024 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,n_div_16_strided_cn)9025 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_div_16_strided_cn) { 9026 TEST_REQUIRES_ARM_NEON; 9027 for (uint32_t n = 32; n <= 48; n += 16) { 9028 for (size_t k = 1; k <= 40; k += 9) { 9029 GemmMicrokernelTester() 9030 .mr(3) 9031 .nr(16) 9032 .kr(4) 9033 .sr(1) 9034 .m(3) 9035 .n(n) 9036 .k(k) 9037 .cn_stride(19) 9038 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9039 } 9040 } 9041 } 9042 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,n_div_16_strided_a)9043 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_div_16_strided_a) { 9044 TEST_REQUIRES_ARM_NEON; 9045 for (uint32_t n = 32; n <= 48; n += 16) { 9046 for (size_t k = 1; k <= 40; k += 9) { 9047 GemmMicrokernelTester() 9048 .mr(3) 9049 .nr(16) 9050 .kr(4) 9051 .sr(1) 9052 .m(3) 9053 .n(n) 9054 .k(k) 9055 .a_stride(43) 9056 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9057 } 9058 } 9059 } 9060 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,n_div_16_subtile)9061 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_div_16_subtile) { 9062 TEST_REQUIRES_ARM_NEON; 9063 for (uint32_t n = 32; n <= 48; n += 16) { 9064 for (size_t k = 1; k <= 40; k += 9) { 9065 for (uint32_t m = 1; m <= 3; m++) { 9066 GemmMicrokernelTester() 9067 .mr(3) 9068 .nr(16) 9069 .kr(4) 9070 .sr(1) 9071 .m(m) 9072 .n(n) 9073 .k(k) 9074 .iterations(1) 9075 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9076 } 9077 } 9078 } 9079 } 9080 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,strided_cm_subtile)9081 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, strided_cm_subtile) { 9082 TEST_REQUIRES_ARM_NEON; 9083 for (size_t k = 1; k <= 40; k += 9) { 9084 for (uint32_t n = 1; n <= 16; n++) { 9085 for (uint32_t m = 1; m <= 3; m++) { 9086 GemmMicrokernelTester() 9087 .mr(3) 9088 .nr(16) 9089 .kr(4) 9090 .sr(1) 9091 .m(m) 9092 .n(n) 9093 .k(k) 9094 .cm_stride(19) 9095 .iterations(1) 9096 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9097 } 9098 } 9099 } 9100 } 9101 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,qmin)9102 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, qmin) { 9103 TEST_REQUIRES_ARM_NEON; 9104 GemmMicrokernelTester() 9105 .mr(3) 9106 .nr(16) 9107 .kr(4) 9108 .sr(1) 9109 .m(3) 9110 .n(16) 9111 .k(8) 9112 .qmin(128) 9113 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9114 } 9115 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,qmax)9116 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, qmax) { 9117 TEST_REQUIRES_ARM_NEON; 9118 GemmMicrokernelTester() 9119 .mr(3) 9120 .nr(16) 9121 .kr(4) 9122 .sr(1) 9123 .m(3) 9124 .n(16) 9125 .k(8) 9126 .qmax(128) 9127 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9128 } 9129 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,strided_cm)9130 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, strided_cm) { 9131 TEST_REQUIRES_ARM_NEON; 9132 GemmMicrokernelTester() 9133 .mr(3) 9134 .nr(16) 9135 .kr(4) 9136 .sr(1) 9137 .m(3) 9138 .n(16) 9139 .k(8) 9140 .cm_stride(19) 9141 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9142 } 9143 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 9144 9145 9146 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_eq_8)9147 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_eq_8) { 9148 TEST_REQUIRES_ARM_NEON; 9149 GemmMicrokernelTester() 9150 .mr(4) 9151 .nr(16) 9152 .kr(4) 9153 .sr(1) 9154 .m(4) 9155 .n(16) 9156 .k(8) 9157 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9158 } 9159 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,strided_cn)9160 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, strided_cn) { 9161 TEST_REQUIRES_ARM_NEON; 9162 GemmMicrokernelTester() 9163 .mr(4) 9164 .nr(16) 9165 .kr(4) 9166 .sr(1) 9167 .m(4) 9168 .n(16) 9169 .k(8) 9170 .cn_stride(19) 9171 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9172 } 9173 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_eq_8_strided_a)9174 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_eq_8_strided_a) { 9175 TEST_REQUIRES_ARM_NEON; 9176 GemmMicrokernelTester() 9177 .mr(4) 9178 .nr(16) 9179 .kr(4) 9180 .sr(1) 9181 .m(4) 9182 .n(16) 9183 .k(8) 9184 .a_stride(11) 9185 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9186 } 9187 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_eq_8_subtile)9188 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_eq_8_subtile) { 9189 TEST_REQUIRES_ARM_NEON; 9190 for (uint32_t n = 1; n <= 16; n++) { 9191 for (uint32_t m = 1; m <= 4; m++) { 9192 GemmMicrokernelTester() 9193 .mr(4) 9194 .nr(16) 9195 .kr(4) 9196 .sr(1) 9197 .m(m) 9198 .n(n) 9199 .k(8) 9200 .iterations(1) 9201 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9202 } 9203 } 9204 } 9205 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_eq_8_subtile_m)9206 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_eq_8_subtile_m) { 9207 TEST_REQUIRES_ARM_NEON; 9208 for (uint32_t m = 1; m <= 4; m++) { 9209 GemmMicrokernelTester() 9210 .mr(4) 9211 .nr(16) 9212 .kr(4) 9213 .sr(1) 9214 .m(m) 9215 .n(16) 9216 .k(8) 9217 .iterations(1) 9218 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9219 } 9220 } 9221 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_eq_8_subtile_n)9222 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_eq_8_subtile_n) { 9223 TEST_REQUIRES_ARM_NEON; 9224 for (uint32_t n = 1; n <= 16; n++) { 9225 GemmMicrokernelTester() 9226 .mr(4) 9227 .nr(16) 9228 .kr(4) 9229 .sr(1) 9230 .m(4) 9231 .n(n) 9232 .k(8) 9233 .iterations(1) 9234 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9235 } 9236 } 9237 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_lt_8)9238 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_lt_8) { 9239 TEST_REQUIRES_ARM_NEON; 9240 for (size_t k = 1; k < 8; k++) { 9241 GemmMicrokernelTester() 9242 .mr(4) 9243 .nr(16) 9244 .kr(4) 9245 .sr(1) 9246 .m(4) 9247 .n(16) 9248 .k(k) 9249 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9250 } 9251 } 9252 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_lt_8_strided_a)9253 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_lt_8_strided_a) { 9254 TEST_REQUIRES_ARM_NEON; 9255 for (size_t k = 1; k < 8; k++) { 9256 GemmMicrokernelTester() 9257 .mr(4) 9258 .nr(16) 9259 .kr(4) 9260 .sr(1) 9261 .m(4) 9262 .n(16) 9263 .k(k) 9264 .a_stride(11) 9265 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9266 } 9267 } 9268 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_lt_8_subtile)9269 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_lt_8_subtile) { 9270 TEST_REQUIRES_ARM_NEON; 9271 for (size_t k = 1; k < 8; k++) { 9272 for (uint32_t n = 1; n <= 16; n++) { 9273 for (uint32_t m = 1; m <= 4; m++) { 9274 GemmMicrokernelTester() 9275 .mr(4) 9276 .nr(16) 9277 .kr(4) 9278 .sr(1) 9279 .m(m) 9280 .n(n) 9281 .k(k) 9282 .iterations(1) 9283 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9284 } 9285 } 9286 } 9287 } 9288 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_gt_8)9289 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_gt_8) { 9290 TEST_REQUIRES_ARM_NEON; 9291 for (size_t k = 9; k < 16; k++) { 9292 GemmMicrokernelTester() 9293 .mr(4) 9294 .nr(16) 9295 .kr(4) 9296 .sr(1) 9297 .m(4) 9298 .n(16) 9299 .k(k) 9300 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9301 } 9302 } 9303 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_gt_8_strided_a)9304 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_gt_8_strided_a) { 9305 TEST_REQUIRES_ARM_NEON; 9306 for (size_t k = 9; k < 16; k++) { 9307 GemmMicrokernelTester() 9308 .mr(4) 9309 .nr(16) 9310 .kr(4) 9311 .sr(1) 9312 .m(4) 9313 .n(16) 9314 .k(k) 9315 .a_stride(19) 9316 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9317 } 9318 } 9319 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_gt_8_subtile)9320 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_gt_8_subtile) { 9321 TEST_REQUIRES_ARM_NEON; 9322 for (size_t k = 9; k < 16; k++) { 9323 for (uint32_t n = 1; n <= 16; n++) { 9324 for (uint32_t m = 1; m <= 4; m++) { 9325 GemmMicrokernelTester() 9326 .mr(4) 9327 .nr(16) 9328 .kr(4) 9329 .sr(1) 9330 .m(m) 9331 .n(n) 9332 .k(k) 9333 .iterations(1) 9334 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9335 } 9336 } 9337 } 9338 } 9339 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_div_8)9340 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_div_8) { 9341 TEST_REQUIRES_ARM_NEON; 9342 for (size_t k = 16; k <= 80; k += 8) { 9343 GemmMicrokernelTester() 9344 .mr(4) 9345 .nr(16) 9346 .kr(4) 9347 .sr(1) 9348 .m(4) 9349 .n(16) 9350 .k(k) 9351 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9352 } 9353 } 9354 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_div_8_strided_a)9355 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_div_8_strided_a) { 9356 TEST_REQUIRES_ARM_NEON; 9357 for (size_t k = 16; k <= 80; k += 8) { 9358 GemmMicrokernelTester() 9359 .mr(4) 9360 .nr(16) 9361 .kr(4) 9362 .sr(1) 9363 .m(4) 9364 .n(16) 9365 .k(k) 9366 .a_stride(83) 9367 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9368 } 9369 } 9370 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_div_8_subtile)9371 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_div_8_subtile) { 9372 TEST_REQUIRES_ARM_NEON; 9373 for (size_t k = 16; k <= 80; k += 8) { 9374 for (uint32_t n = 1; n <= 16; n++) { 9375 for (uint32_t m = 1; m <= 4; m++) { 9376 GemmMicrokernelTester() 9377 .mr(4) 9378 .nr(16) 9379 .kr(4) 9380 .sr(1) 9381 .m(m) 9382 .n(n) 9383 .k(k) 9384 .iterations(1) 9385 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9386 } 9387 } 9388 } 9389 } 9390 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,n_gt_16)9391 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, n_gt_16) { 9392 TEST_REQUIRES_ARM_NEON; 9393 for (uint32_t n = 17; n < 32; n++) { 9394 for (size_t k = 1; k <= 40; k += 9) { 9395 GemmMicrokernelTester() 9396 .mr(4) 9397 .nr(16) 9398 .kr(4) 9399 .sr(1) 9400 .m(4) 9401 .n(n) 9402 .k(k) 9403 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9404 } 9405 } 9406 } 9407 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,n_gt_16_strided_cn)9408 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, n_gt_16_strided_cn) { 9409 TEST_REQUIRES_ARM_NEON; 9410 for (uint32_t n = 17; n < 32; n++) { 9411 for (size_t k = 1; k <= 40; k += 9) { 9412 GemmMicrokernelTester() 9413 .mr(4) 9414 .nr(16) 9415 .kr(4) 9416 .sr(1) 9417 .m(4) 9418 .n(n) 9419 .k(k) 9420 .cn_stride(19) 9421 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9422 } 9423 } 9424 } 9425 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,n_gt_16_strided_a)9426 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, n_gt_16_strided_a) { 9427 TEST_REQUIRES_ARM_NEON; 9428 for (uint32_t n = 17; n < 32; n++) { 9429 for (size_t k = 1; k <= 40; k += 9) { 9430 GemmMicrokernelTester() 9431 .mr(4) 9432 .nr(16) 9433 .kr(4) 9434 .sr(1) 9435 .m(4) 9436 .n(n) 9437 .k(k) 9438 .a_stride(43) 9439 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9440 } 9441 } 9442 } 9443 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,n_gt_16_subtile)9444 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, n_gt_16_subtile) { 9445 TEST_REQUIRES_ARM_NEON; 9446 for (uint32_t n = 17; n < 32; n++) { 9447 for (size_t k = 1; k <= 40; k += 9) { 9448 for (uint32_t m = 1; m <= 4; m++) { 9449 GemmMicrokernelTester() 9450 .mr(4) 9451 .nr(16) 9452 .kr(4) 9453 .sr(1) 9454 .m(m) 9455 .n(n) 9456 .k(k) 9457 .iterations(1) 9458 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9459 } 9460 } 9461 } 9462 } 9463 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,n_div_16)9464 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, n_div_16) { 9465 TEST_REQUIRES_ARM_NEON; 9466 for (uint32_t n = 32; n <= 48; n += 16) { 9467 for (size_t k = 1; k <= 40; k += 9) { 9468 GemmMicrokernelTester() 9469 .mr(4) 9470 .nr(16) 9471 .kr(4) 9472 .sr(1) 9473 .m(4) 9474 .n(n) 9475 .k(k) 9476 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9477 } 9478 } 9479 } 9480 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,n_div_16_strided_cn)9481 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, n_div_16_strided_cn) { 9482 TEST_REQUIRES_ARM_NEON; 9483 for (uint32_t n = 32; n <= 48; n += 16) { 9484 for (size_t k = 1; k <= 40; k += 9) { 9485 GemmMicrokernelTester() 9486 .mr(4) 9487 .nr(16) 9488 .kr(4) 9489 .sr(1) 9490 .m(4) 9491 .n(n) 9492 .k(k) 9493 .cn_stride(19) 9494 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9495 } 9496 } 9497 } 9498 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,n_div_16_strided_a)9499 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, n_div_16_strided_a) { 9500 TEST_REQUIRES_ARM_NEON; 9501 for (uint32_t n = 32; n <= 48; n += 16) { 9502 for (size_t k = 1; k <= 40; k += 9) { 9503 GemmMicrokernelTester() 9504 .mr(4) 9505 .nr(16) 9506 .kr(4) 9507 .sr(1) 9508 .m(4) 9509 .n(n) 9510 .k(k) 9511 .a_stride(43) 9512 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9513 } 9514 } 9515 } 9516 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,n_div_16_subtile)9517 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, n_div_16_subtile) { 9518 TEST_REQUIRES_ARM_NEON; 9519 for (uint32_t n = 32; n <= 48; n += 16) { 9520 for (size_t k = 1; k <= 40; k += 9) { 9521 for (uint32_t m = 1; m <= 4; m++) { 9522 GemmMicrokernelTester() 9523 .mr(4) 9524 .nr(16) 9525 .kr(4) 9526 .sr(1) 9527 .m(m) 9528 .n(n) 9529 .k(k) 9530 .iterations(1) 9531 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9532 } 9533 } 9534 } 9535 } 9536 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,strided_cm_subtile)9537 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, strided_cm_subtile) { 9538 TEST_REQUIRES_ARM_NEON; 9539 for (size_t k = 1; k <= 40; k += 9) { 9540 for (uint32_t n = 1; n <= 16; n++) { 9541 for (uint32_t m = 1; m <= 4; m++) { 9542 GemmMicrokernelTester() 9543 .mr(4) 9544 .nr(16) 9545 .kr(4) 9546 .sr(1) 9547 .m(m) 9548 .n(n) 9549 .k(k) 9550 .cm_stride(19) 9551 .iterations(1) 9552 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9553 } 9554 } 9555 } 9556 } 9557 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,qmin)9558 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, qmin) { 9559 TEST_REQUIRES_ARM_NEON; 9560 GemmMicrokernelTester() 9561 .mr(4) 9562 .nr(16) 9563 .kr(4) 9564 .sr(1) 9565 .m(4) 9566 .n(16) 9567 .k(8) 9568 .qmin(128) 9569 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9570 } 9571 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,qmax)9572 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, qmax) { 9573 TEST_REQUIRES_ARM_NEON; 9574 GemmMicrokernelTester() 9575 .mr(4) 9576 .nr(16) 9577 .kr(4) 9578 .sr(1) 9579 .m(4) 9580 .n(16) 9581 .k(8) 9582 .qmax(128) 9583 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9584 } 9585 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,strided_cm)9586 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, strided_cm) { 9587 TEST_REQUIRES_ARM_NEON; 9588 GemmMicrokernelTester() 9589 .mr(4) 9590 .nr(16) 9591 .kr(4) 9592 .sr(1) 9593 .m(4) 9594 .n(16) 9595 .k(8) 9596 .cm_stride(19) 9597 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9598 } 9599 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 9600 9601 9602 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_eq_16)9603 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_eq_16) { 9604 TEST_REQUIRES_ARM_NEON; 9605 GemmMicrokernelTester() 9606 .mr(4) 9607 .nr(8) 9608 .kr(4) 9609 .sr(1) 9610 .m(4) 9611 .n(8) 9612 .k(16) 9613 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9614 } 9615 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,strided_cn)9616 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, strided_cn) { 9617 TEST_REQUIRES_ARM_NEON; 9618 GemmMicrokernelTester() 9619 .mr(4) 9620 .nr(8) 9621 .kr(4) 9622 .sr(1) 9623 .m(4) 9624 .n(8) 9625 .k(16) 9626 .cn_stride(11) 9627 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9628 } 9629 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_eq_16_strided_a)9630 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_eq_16_strided_a) { 9631 TEST_REQUIRES_ARM_NEON; 9632 GemmMicrokernelTester() 9633 .mr(4) 9634 .nr(8) 9635 .kr(4) 9636 .sr(1) 9637 .m(4) 9638 .n(8) 9639 .k(16) 9640 .a_stride(19) 9641 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9642 } 9643 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_eq_16_subtile)9644 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_eq_16_subtile) { 9645 TEST_REQUIRES_ARM_NEON; 9646 for (uint32_t n = 1; n <= 8; n++) { 9647 for (uint32_t m = 1; m <= 4; m++) { 9648 GemmMicrokernelTester() 9649 .mr(4) 9650 .nr(8) 9651 .kr(4) 9652 .sr(1) 9653 .m(m) 9654 .n(n) 9655 .k(16) 9656 .iterations(1) 9657 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9658 } 9659 } 9660 } 9661 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_eq_16_subtile_m)9662 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_eq_16_subtile_m) { 9663 TEST_REQUIRES_ARM_NEON; 9664 for (uint32_t m = 1; m <= 4; m++) { 9665 GemmMicrokernelTester() 9666 .mr(4) 9667 .nr(8) 9668 .kr(4) 9669 .sr(1) 9670 .m(m) 9671 .n(8) 9672 .k(16) 9673 .iterations(1) 9674 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9675 } 9676 } 9677 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_eq_16_subtile_n)9678 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_eq_16_subtile_n) { 9679 TEST_REQUIRES_ARM_NEON; 9680 for (uint32_t n = 1; n <= 8; n++) { 9681 GemmMicrokernelTester() 9682 .mr(4) 9683 .nr(8) 9684 .kr(4) 9685 .sr(1) 9686 .m(4) 9687 .n(n) 9688 .k(16) 9689 .iterations(1) 9690 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9691 } 9692 } 9693 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_lt_16)9694 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_lt_16) { 9695 TEST_REQUIRES_ARM_NEON; 9696 for (size_t k = 1; k < 16; k++) { 9697 GemmMicrokernelTester() 9698 .mr(4) 9699 .nr(8) 9700 .kr(4) 9701 .sr(1) 9702 .m(4) 9703 .n(8) 9704 .k(k) 9705 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9706 } 9707 } 9708 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_lt_16_strided_a)9709 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_lt_16_strided_a) { 9710 TEST_REQUIRES_ARM_NEON; 9711 for (size_t k = 1; k < 16; k++) { 9712 GemmMicrokernelTester() 9713 .mr(4) 9714 .nr(8) 9715 .kr(4) 9716 .sr(1) 9717 .m(4) 9718 .n(8) 9719 .k(k) 9720 .a_stride(19) 9721 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9722 } 9723 } 9724 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_lt_16_subtile)9725 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_lt_16_subtile) { 9726 TEST_REQUIRES_ARM_NEON; 9727 for (size_t k = 1; k < 16; k++) { 9728 for (uint32_t n = 1; n <= 8; n++) { 9729 for (uint32_t m = 1; m <= 4; m++) { 9730 GemmMicrokernelTester() 9731 .mr(4) 9732 .nr(8) 9733 .kr(4) 9734 .sr(1) 9735 .m(m) 9736 .n(n) 9737 .k(k) 9738 .iterations(1) 9739 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9740 } 9741 } 9742 } 9743 } 9744 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_gt_16)9745 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_gt_16) { 9746 TEST_REQUIRES_ARM_NEON; 9747 for (size_t k = 17; k < 32; k++) { 9748 GemmMicrokernelTester() 9749 .mr(4) 9750 .nr(8) 9751 .kr(4) 9752 .sr(1) 9753 .m(4) 9754 .n(8) 9755 .k(k) 9756 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9757 } 9758 } 9759 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_gt_16_strided_a)9760 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_gt_16_strided_a) { 9761 TEST_REQUIRES_ARM_NEON; 9762 for (size_t k = 17; k < 32; k++) { 9763 GemmMicrokernelTester() 9764 .mr(4) 9765 .nr(8) 9766 .kr(4) 9767 .sr(1) 9768 .m(4) 9769 .n(8) 9770 .k(k) 9771 .a_stride(37) 9772 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9773 } 9774 } 9775 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_gt_16_subtile)9776 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_gt_16_subtile) { 9777 TEST_REQUIRES_ARM_NEON; 9778 for (size_t k = 17; k < 32; k++) { 9779 for (uint32_t n = 1; n <= 8; n++) { 9780 for (uint32_t m = 1; m <= 4; m++) { 9781 GemmMicrokernelTester() 9782 .mr(4) 9783 .nr(8) 9784 .kr(4) 9785 .sr(1) 9786 .m(m) 9787 .n(n) 9788 .k(k) 9789 .iterations(1) 9790 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9791 } 9792 } 9793 } 9794 } 9795 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_div_16)9796 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_div_16) { 9797 TEST_REQUIRES_ARM_NEON; 9798 for (size_t k = 32; k <= 160; k += 16) { 9799 GemmMicrokernelTester() 9800 .mr(4) 9801 .nr(8) 9802 .kr(4) 9803 .sr(1) 9804 .m(4) 9805 .n(8) 9806 .k(k) 9807 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9808 } 9809 } 9810 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_div_16_strided_a)9811 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_div_16_strided_a) { 9812 TEST_REQUIRES_ARM_NEON; 9813 for (size_t k = 32; k <= 160; k += 16) { 9814 GemmMicrokernelTester() 9815 .mr(4) 9816 .nr(8) 9817 .kr(4) 9818 .sr(1) 9819 .m(4) 9820 .n(8) 9821 .k(k) 9822 .a_stride(163) 9823 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9824 } 9825 } 9826 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_div_16_subtile)9827 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_div_16_subtile) { 9828 TEST_REQUIRES_ARM_NEON; 9829 for (size_t k = 32; k <= 160; k += 16) { 9830 for (uint32_t n = 1; n <= 8; n++) { 9831 for (uint32_t m = 1; m <= 4; m++) { 9832 GemmMicrokernelTester() 9833 .mr(4) 9834 .nr(8) 9835 .kr(4) 9836 .sr(1) 9837 .m(m) 9838 .n(n) 9839 .k(k) 9840 .iterations(1) 9841 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9842 } 9843 } 9844 } 9845 } 9846 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,n_gt_8)9847 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_gt_8) { 9848 TEST_REQUIRES_ARM_NEON; 9849 for (uint32_t n = 9; n < 16; n++) { 9850 for (size_t k = 1; k <= 80; k += 17) { 9851 GemmMicrokernelTester() 9852 .mr(4) 9853 .nr(8) 9854 .kr(4) 9855 .sr(1) 9856 .m(4) 9857 .n(n) 9858 .k(k) 9859 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9860 } 9861 } 9862 } 9863 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,n_gt_8_strided_cn)9864 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_gt_8_strided_cn) { 9865 TEST_REQUIRES_ARM_NEON; 9866 for (uint32_t n = 9; n < 16; n++) { 9867 for (size_t k = 1; k <= 80; k += 17) { 9868 GemmMicrokernelTester() 9869 .mr(4) 9870 .nr(8) 9871 .kr(4) 9872 .sr(1) 9873 .m(4) 9874 .n(n) 9875 .k(k) 9876 .cn_stride(11) 9877 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9878 } 9879 } 9880 } 9881 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,n_gt_8_strided_a)9882 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_gt_8_strided_a) { 9883 TEST_REQUIRES_ARM_NEON; 9884 for (uint32_t n = 9; n < 16; n++) { 9885 for (size_t k = 1; k <= 80; k += 17) { 9886 GemmMicrokernelTester() 9887 .mr(4) 9888 .nr(8) 9889 .kr(4) 9890 .sr(1) 9891 .m(4) 9892 .n(n) 9893 .k(k) 9894 .a_stride(83) 9895 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9896 } 9897 } 9898 } 9899 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,n_gt_8_subtile)9900 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_gt_8_subtile) { 9901 TEST_REQUIRES_ARM_NEON; 9902 for (uint32_t n = 9; n < 16; n++) { 9903 for (size_t k = 1; k <= 80; k += 17) { 9904 for (uint32_t m = 1; m <= 4; m++) { 9905 GemmMicrokernelTester() 9906 .mr(4) 9907 .nr(8) 9908 .kr(4) 9909 .sr(1) 9910 .m(m) 9911 .n(n) 9912 .k(k) 9913 .iterations(1) 9914 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9915 } 9916 } 9917 } 9918 } 9919 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,n_div_8)9920 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_div_8) { 9921 TEST_REQUIRES_ARM_NEON; 9922 for (uint32_t n = 16; n <= 24; n += 8) { 9923 for (size_t k = 1; k <= 80; k += 17) { 9924 GemmMicrokernelTester() 9925 .mr(4) 9926 .nr(8) 9927 .kr(4) 9928 .sr(1) 9929 .m(4) 9930 .n(n) 9931 .k(k) 9932 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9933 } 9934 } 9935 } 9936 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,n_div_8_strided_cn)9937 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_div_8_strided_cn) { 9938 TEST_REQUIRES_ARM_NEON; 9939 for (uint32_t n = 16; n <= 24; n += 8) { 9940 for (size_t k = 1; k <= 80; k += 17) { 9941 GemmMicrokernelTester() 9942 .mr(4) 9943 .nr(8) 9944 .kr(4) 9945 .sr(1) 9946 .m(4) 9947 .n(n) 9948 .k(k) 9949 .cn_stride(11) 9950 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9951 } 9952 } 9953 } 9954 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,n_div_8_strided_a)9955 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_div_8_strided_a) { 9956 TEST_REQUIRES_ARM_NEON; 9957 for (uint32_t n = 16; n <= 24; n += 8) { 9958 for (size_t k = 1; k <= 80; k += 17) { 9959 GemmMicrokernelTester() 9960 .mr(4) 9961 .nr(8) 9962 .kr(4) 9963 .sr(1) 9964 .m(4) 9965 .n(n) 9966 .k(k) 9967 .a_stride(83) 9968 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9969 } 9970 } 9971 } 9972 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,n_div_8_subtile)9973 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_div_8_subtile) { 9974 TEST_REQUIRES_ARM_NEON; 9975 for (uint32_t n = 16; n <= 24; n += 8) { 9976 for (size_t k = 1; k <= 80; k += 17) { 9977 for (uint32_t m = 1; m <= 4; m++) { 9978 GemmMicrokernelTester() 9979 .mr(4) 9980 .nr(8) 9981 .kr(4) 9982 .sr(1) 9983 .m(m) 9984 .n(n) 9985 .k(k) 9986 .iterations(1) 9987 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9988 } 9989 } 9990 } 9991 } 9992 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,strided_cm_subtile)9993 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, strided_cm_subtile) { 9994 TEST_REQUIRES_ARM_NEON; 9995 for (size_t k = 1; k <= 80; k += 17) { 9996 for (uint32_t n = 1; n <= 8; n++) { 9997 for (uint32_t m = 1; m <= 4; m++) { 9998 GemmMicrokernelTester() 9999 .mr(4) 10000 .nr(8) 10001 .kr(4) 10002 .sr(1) 10003 .m(m) 10004 .n(n) 10005 .k(k) 10006 .cm_stride(11) 10007 .iterations(1) 10008 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10009 } 10010 } 10011 } 10012 } 10013 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,qmin)10014 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, qmin) { 10015 TEST_REQUIRES_ARM_NEON; 10016 GemmMicrokernelTester() 10017 .mr(4) 10018 .nr(8) 10019 .kr(4) 10020 .sr(1) 10021 .m(4) 10022 .n(8) 10023 .k(16) 10024 .qmin(128) 10025 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10026 } 10027 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,qmax)10028 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, qmax) { 10029 TEST_REQUIRES_ARM_NEON; 10030 GemmMicrokernelTester() 10031 .mr(4) 10032 .nr(8) 10033 .kr(4) 10034 .sr(1) 10035 .m(4) 10036 .n(8) 10037 .k(16) 10038 .qmax(128) 10039 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10040 } 10041 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,strided_cm)10042 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, strided_cm) { 10043 TEST_REQUIRES_ARM_NEON; 10044 GemmMicrokernelTester() 10045 .mr(4) 10046 .nr(8) 10047 .kr(4) 10048 .sr(1) 10049 .m(4) 10050 .n(8) 10051 .k(16) 10052 .cm_stride(11) 10053 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10054 } 10055 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 10056 10057 10058 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_eq_16)10059 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_eq_16) { 10060 TEST_REQUIRES_ARM_NEON; 10061 GemmMicrokernelTester() 10062 .mr(1) 10063 .nr(16) 10064 .kr(4) 10065 .sr(1) 10066 .m(1) 10067 .n(16) 10068 .k(16) 10069 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10070 } 10071 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,strided_cn)10072 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, strided_cn) { 10073 TEST_REQUIRES_ARM_NEON; 10074 GemmMicrokernelTester() 10075 .mr(1) 10076 .nr(16) 10077 .kr(4) 10078 .sr(1) 10079 .m(1) 10080 .n(16) 10081 .k(16) 10082 .cn_stride(19) 10083 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10084 } 10085 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_eq_16_strided_a)10086 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_eq_16_strided_a) { 10087 TEST_REQUIRES_ARM_NEON; 10088 GemmMicrokernelTester() 10089 .mr(1) 10090 .nr(16) 10091 .kr(4) 10092 .sr(1) 10093 .m(1) 10094 .n(16) 10095 .k(16) 10096 .a_stride(19) 10097 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10098 } 10099 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_eq_16_subtile)10100 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_eq_16_subtile) { 10101 TEST_REQUIRES_ARM_NEON; 10102 for (uint32_t n = 1; n <= 16; n++) { 10103 for (uint32_t m = 1; m <= 1; m++) { 10104 GemmMicrokernelTester() 10105 .mr(1) 10106 .nr(16) 10107 .kr(4) 10108 .sr(1) 10109 .m(m) 10110 .n(n) 10111 .k(16) 10112 .iterations(1) 10113 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10114 } 10115 } 10116 } 10117 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_eq_16_subtile_m)10118 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_eq_16_subtile_m) { 10119 TEST_REQUIRES_ARM_NEON; 10120 for (uint32_t m = 1; m <= 1; m++) { 10121 GemmMicrokernelTester() 10122 .mr(1) 10123 .nr(16) 10124 .kr(4) 10125 .sr(1) 10126 .m(m) 10127 .n(16) 10128 .k(16) 10129 .iterations(1) 10130 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10131 } 10132 } 10133 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_eq_16_subtile_n)10134 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_eq_16_subtile_n) { 10135 TEST_REQUIRES_ARM_NEON; 10136 for (uint32_t n = 1; n <= 16; n++) { 10137 GemmMicrokernelTester() 10138 .mr(1) 10139 .nr(16) 10140 .kr(4) 10141 .sr(1) 10142 .m(1) 10143 .n(n) 10144 .k(16) 10145 .iterations(1) 10146 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10147 } 10148 } 10149 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_lt_16)10150 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_lt_16) { 10151 TEST_REQUIRES_ARM_NEON; 10152 for (size_t k = 1; k < 16; k++) { 10153 GemmMicrokernelTester() 10154 .mr(1) 10155 .nr(16) 10156 .kr(4) 10157 .sr(1) 10158 .m(1) 10159 .n(16) 10160 .k(k) 10161 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10162 } 10163 } 10164 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_lt_16_strided_a)10165 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_lt_16_strided_a) { 10166 TEST_REQUIRES_ARM_NEON; 10167 for (size_t k = 1; k < 16; k++) { 10168 GemmMicrokernelTester() 10169 .mr(1) 10170 .nr(16) 10171 .kr(4) 10172 .sr(1) 10173 .m(1) 10174 .n(16) 10175 .k(k) 10176 .a_stride(19) 10177 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10178 } 10179 } 10180 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_lt_16_subtile)10181 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_lt_16_subtile) { 10182 TEST_REQUIRES_ARM_NEON; 10183 for (size_t k = 1; k < 16; k++) { 10184 for (uint32_t n = 1; n <= 16; n++) { 10185 for (uint32_t m = 1; m <= 1; m++) { 10186 GemmMicrokernelTester() 10187 .mr(1) 10188 .nr(16) 10189 .kr(4) 10190 .sr(1) 10191 .m(m) 10192 .n(n) 10193 .k(k) 10194 .iterations(1) 10195 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10196 } 10197 } 10198 } 10199 } 10200 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_gt_16)10201 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_gt_16) { 10202 TEST_REQUIRES_ARM_NEON; 10203 for (size_t k = 17; k < 32; k++) { 10204 GemmMicrokernelTester() 10205 .mr(1) 10206 .nr(16) 10207 .kr(4) 10208 .sr(1) 10209 .m(1) 10210 .n(16) 10211 .k(k) 10212 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10213 } 10214 } 10215 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_gt_16_strided_a)10216 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_gt_16_strided_a) { 10217 TEST_REQUIRES_ARM_NEON; 10218 for (size_t k = 17; k < 32; k++) { 10219 GemmMicrokernelTester() 10220 .mr(1) 10221 .nr(16) 10222 .kr(4) 10223 .sr(1) 10224 .m(1) 10225 .n(16) 10226 .k(k) 10227 .a_stride(37) 10228 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10229 } 10230 } 10231 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_gt_16_subtile)10232 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_gt_16_subtile) { 10233 TEST_REQUIRES_ARM_NEON; 10234 for (size_t k = 17; k < 32; k++) { 10235 for (uint32_t n = 1; n <= 16; n++) { 10236 for (uint32_t m = 1; m <= 1; m++) { 10237 GemmMicrokernelTester() 10238 .mr(1) 10239 .nr(16) 10240 .kr(4) 10241 .sr(1) 10242 .m(m) 10243 .n(n) 10244 .k(k) 10245 .iterations(1) 10246 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10247 } 10248 } 10249 } 10250 } 10251 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_div_16)10252 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_div_16) { 10253 TEST_REQUIRES_ARM_NEON; 10254 for (size_t k = 32; k <= 160; k += 16) { 10255 GemmMicrokernelTester() 10256 .mr(1) 10257 .nr(16) 10258 .kr(4) 10259 .sr(1) 10260 .m(1) 10261 .n(16) 10262 .k(k) 10263 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10264 } 10265 } 10266 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_div_16_strided_a)10267 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_div_16_strided_a) { 10268 TEST_REQUIRES_ARM_NEON; 10269 for (size_t k = 32; k <= 160; k += 16) { 10270 GemmMicrokernelTester() 10271 .mr(1) 10272 .nr(16) 10273 .kr(4) 10274 .sr(1) 10275 .m(1) 10276 .n(16) 10277 .k(k) 10278 .a_stride(163) 10279 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10280 } 10281 } 10282 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_div_16_subtile)10283 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_div_16_subtile) { 10284 TEST_REQUIRES_ARM_NEON; 10285 for (size_t k = 32; k <= 160; k += 16) { 10286 for (uint32_t n = 1; n <= 16; n++) { 10287 for (uint32_t m = 1; m <= 1; m++) { 10288 GemmMicrokernelTester() 10289 .mr(1) 10290 .nr(16) 10291 .kr(4) 10292 .sr(1) 10293 .m(m) 10294 .n(n) 10295 .k(k) 10296 .iterations(1) 10297 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10298 } 10299 } 10300 } 10301 } 10302 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,n_gt_16)10303 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, n_gt_16) { 10304 TEST_REQUIRES_ARM_NEON; 10305 for (uint32_t n = 17; n < 32; n++) { 10306 for (size_t k = 1; k <= 80; k += 17) { 10307 GemmMicrokernelTester() 10308 .mr(1) 10309 .nr(16) 10310 .kr(4) 10311 .sr(1) 10312 .m(1) 10313 .n(n) 10314 .k(k) 10315 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10316 } 10317 } 10318 } 10319 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,n_gt_16_strided_cn)10320 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, n_gt_16_strided_cn) { 10321 TEST_REQUIRES_ARM_NEON; 10322 for (uint32_t n = 17; n < 32; n++) { 10323 for (size_t k = 1; k <= 80; k += 17) { 10324 GemmMicrokernelTester() 10325 .mr(1) 10326 .nr(16) 10327 .kr(4) 10328 .sr(1) 10329 .m(1) 10330 .n(n) 10331 .k(k) 10332 .cn_stride(19) 10333 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10334 } 10335 } 10336 } 10337 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,n_gt_16_strided_a)10338 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, n_gt_16_strided_a) { 10339 TEST_REQUIRES_ARM_NEON; 10340 for (uint32_t n = 17; n < 32; n++) { 10341 for (size_t k = 1; k <= 80; k += 17) { 10342 GemmMicrokernelTester() 10343 .mr(1) 10344 .nr(16) 10345 .kr(4) 10346 .sr(1) 10347 .m(1) 10348 .n(n) 10349 .k(k) 10350 .a_stride(83) 10351 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10352 } 10353 } 10354 } 10355 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,n_gt_16_subtile)10356 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, n_gt_16_subtile) { 10357 TEST_REQUIRES_ARM_NEON; 10358 for (uint32_t n = 17; n < 32; n++) { 10359 for (size_t k = 1; k <= 80; k += 17) { 10360 for (uint32_t m = 1; m <= 1; m++) { 10361 GemmMicrokernelTester() 10362 .mr(1) 10363 .nr(16) 10364 .kr(4) 10365 .sr(1) 10366 .m(m) 10367 .n(n) 10368 .k(k) 10369 .iterations(1) 10370 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10371 } 10372 } 10373 } 10374 } 10375 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,n_div_16)10376 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, n_div_16) { 10377 TEST_REQUIRES_ARM_NEON; 10378 for (uint32_t n = 32; n <= 48; n += 16) { 10379 for (size_t k = 1; k <= 80; k += 17) { 10380 GemmMicrokernelTester() 10381 .mr(1) 10382 .nr(16) 10383 .kr(4) 10384 .sr(1) 10385 .m(1) 10386 .n(n) 10387 .k(k) 10388 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10389 } 10390 } 10391 } 10392 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,n_div_16_strided_cn)10393 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, n_div_16_strided_cn) { 10394 TEST_REQUIRES_ARM_NEON; 10395 for (uint32_t n = 32; n <= 48; n += 16) { 10396 for (size_t k = 1; k <= 80; k += 17) { 10397 GemmMicrokernelTester() 10398 .mr(1) 10399 .nr(16) 10400 .kr(4) 10401 .sr(1) 10402 .m(1) 10403 .n(n) 10404 .k(k) 10405 .cn_stride(19) 10406 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10407 } 10408 } 10409 } 10410 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,n_div_16_strided_a)10411 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, n_div_16_strided_a) { 10412 TEST_REQUIRES_ARM_NEON; 10413 for (uint32_t n = 32; n <= 48; n += 16) { 10414 for (size_t k = 1; k <= 80; k += 17) { 10415 GemmMicrokernelTester() 10416 .mr(1) 10417 .nr(16) 10418 .kr(4) 10419 .sr(1) 10420 .m(1) 10421 .n(n) 10422 .k(k) 10423 .a_stride(83) 10424 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10425 } 10426 } 10427 } 10428 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,n_div_16_subtile)10429 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, n_div_16_subtile) { 10430 TEST_REQUIRES_ARM_NEON; 10431 for (uint32_t n = 32; n <= 48; n += 16) { 10432 for (size_t k = 1; k <= 80; k += 17) { 10433 for (uint32_t m = 1; m <= 1; m++) { 10434 GemmMicrokernelTester() 10435 .mr(1) 10436 .nr(16) 10437 .kr(4) 10438 .sr(1) 10439 .m(m) 10440 .n(n) 10441 .k(k) 10442 .iterations(1) 10443 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10444 } 10445 } 10446 } 10447 } 10448 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,strided_cm_subtile)10449 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, strided_cm_subtile) { 10450 TEST_REQUIRES_ARM_NEON; 10451 for (size_t k = 1; k <= 80; k += 17) { 10452 for (uint32_t n = 1; n <= 16; n++) { 10453 for (uint32_t m = 1; m <= 1; m++) { 10454 GemmMicrokernelTester() 10455 .mr(1) 10456 .nr(16) 10457 .kr(4) 10458 .sr(1) 10459 .m(m) 10460 .n(n) 10461 .k(k) 10462 .cm_stride(19) 10463 .iterations(1) 10464 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10465 } 10466 } 10467 } 10468 } 10469 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,qmin)10470 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, qmin) { 10471 TEST_REQUIRES_ARM_NEON; 10472 GemmMicrokernelTester() 10473 .mr(1) 10474 .nr(16) 10475 .kr(4) 10476 .sr(1) 10477 .m(1) 10478 .n(16) 10479 .k(16) 10480 .qmin(128) 10481 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10482 } 10483 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,qmax)10484 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, qmax) { 10485 TEST_REQUIRES_ARM_NEON; 10486 GemmMicrokernelTester() 10487 .mr(1) 10488 .nr(16) 10489 .kr(4) 10490 .sr(1) 10491 .m(1) 10492 .n(16) 10493 .k(16) 10494 .qmax(128) 10495 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10496 } 10497 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,strided_cm)10498 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, strided_cm) { 10499 TEST_REQUIRES_ARM_NEON; 10500 GemmMicrokernelTester() 10501 .mr(1) 10502 .nr(16) 10503 .kr(4) 10504 .sr(1) 10505 .m(1) 10506 .n(16) 10507 .k(16) 10508 .cm_stride(19) 10509 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10510 } 10511 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 10512 10513 10514 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_eq_8)10515 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_eq_8) { 10516 TEST_REQUIRES_ARM_NEON; 10517 GemmMicrokernelTester() 10518 .mr(4) 10519 .nr(16) 10520 .kr(4) 10521 .sr(1) 10522 .m(4) 10523 .n(16) 10524 .k(8) 10525 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10526 } 10527 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,strided_cn)10528 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, strided_cn) { 10529 TEST_REQUIRES_ARM_NEON; 10530 GemmMicrokernelTester() 10531 .mr(4) 10532 .nr(16) 10533 .kr(4) 10534 .sr(1) 10535 .m(4) 10536 .n(16) 10537 .k(8) 10538 .cn_stride(19) 10539 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10540 } 10541 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_eq_8_strided_a)10542 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_eq_8_strided_a) { 10543 TEST_REQUIRES_ARM_NEON; 10544 GemmMicrokernelTester() 10545 .mr(4) 10546 .nr(16) 10547 .kr(4) 10548 .sr(1) 10549 .m(4) 10550 .n(16) 10551 .k(8) 10552 .a_stride(11) 10553 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10554 } 10555 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_eq_8_subtile)10556 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_eq_8_subtile) { 10557 TEST_REQUIRES_ARM_NEON; 10558 for (uint32_t n = 1; n <= 16; n++) { 10559 for (uint32_t m = 1; m <= 4; m++) { 10560 GemmMicrokernelTester() 10561 .mr(4) 10562 .nr(16) 10563 .kr(4) 10564 .sr(1) 10565 .m(m) 10566 .n(n) 10567 .k(8) 10568 .iterations(1) 10569 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10570 } 10571 } 10572 } 10573 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_eq_8_subtile_m)10574 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_eq_8_subtile_m) { 10575 TEST_REQUIRES_ARM_NEON; 10576 for (uint32_t m = 1; m <= 4; m++) { 10577 GemmMicrokernelTester() 10578 .mr(4) 10579 .nr(16) 10580 .kr(4) 10581 .sr(1) 10582 .m(m) 10583 .n(16) 10584 .k(8) 10585 .iterations(1) 10586 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10587 } 10588 } 10589 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_eq_8_subtile_n)10590 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_eq_8_subtile_n) { 10591 TEST_REQUIRES_ARM_NEON; 10592 for (uint32_t n = 1; n <= 16; n++) { 10593 GemmMicrokernelTester() 10594 .mr(4) 10595 .nr(16) 10596 .kr(4) 10597 .sr(1) 10598 .m(4) 10599 .n(n) 10600 .k(8) 10601 .iterations(1) 10602 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10603 } 10604 } 10605 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_lt_8)10606 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_lt_8) { 10607 TEST_REQUIRES_ARM_NEON; 10608 for (size_t k = 1; k < 8; k++) { 10609 GemmMicrokernelTester() 10610 .mr(4) 10611 .nr(16) 10612 .kr(4) 10613 .sr(1) 10614 .m(4) 10615 .n(16) 10616 .k(k) 10617 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10618 } 10619 } 10620 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_lt_8_strided_a)10621 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_lt_8_strided_a) { 10622 TEST_REQUIRES_ARM_NEON; 10623 for (size_t k = 1; k < 8; k++) { 10624 GemmMicrokernelTester() 10625 .mr(4) 10626 .nr(16) 10627 .kr(4) 10628 .sr(1) 10629 .m(4) 10630 .n(16) 10631 .k(k) 10632 .a_stride(11) 10633 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10634 } 10635 } 10636 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_lt_8_subtile)10637 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_lt_8_subtile) { 10638 TEST_REQUIRES_ARM_NEON; 10639 for (size_t k = 1; k < 8; k++) { 10640 for (uint32_t n = 1; n <= 16; n++) { 10641 for (uint32_t m = 1; m <= 4; m++) { 10642 GemmMicrokernelTester() 10643 .mr(4) 10644 .nr(16) 10645 .kr(4) 10646 .sr(1) 10647 .m(m) 10648 .n(n) 10649 .k(k) 10650 .iterations(1) 10651 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10652 } 10653 } 10654 } 10655 } 10656 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_gt_8)10657 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_gt_8) { 10658 TEST_REQUIRES_ARM_NEON; 10659 for (size_t k = 9; k < 16; k++) { 10660 GemmMicrokernelTester() 10661 .mr(4) 10662 .nr(16) 10663 .kr(4) 10664 .sr(1) 10665 .m(4) 10666 .n(16) 10667 .k(k) 10668 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10669 } 10670 } 10671 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_gt_8_strided_a)10672 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_gt_8_strided_a) { 10673 TEST_REQUIRES_ARM_NEON; 10674 for (size_t k = 9; k < 16; k++) { 10675 GemmMicrokernelTester() 10676 .mr(4) 10677 .nr(16) 10678 .kr(4) 10679 .sr(1) 10680 .m(4) 10681 .n(16) 10682 .k(k) 10683 .a_stride(19) 10684 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10685 } 10686 } 10687 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_gt_8_subtile)10688 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_gt_8_subtile) { 10689 TEST_REQUIRES_ARM_NEON; 10690 for (size_t k = 9; k < 16; k++) { 10691 for (uint32_t n = 1; n <= 16; n++) { 10692 for (uint32_t m = 1; m <= 4; m++) { 10693 GemmMicrokernelTester() 10694 .mr(4) 10695 .nr(16) 10696 .kr(4) 10697 .sr(1) 10698 .m(m) 10699 .n(n) 10700 .k(k) 10701 .iterations(1) 10702 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10703 } 10704 } 10705 } 10706 } 10707 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_div_8)10708 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_div_8) { 10709 TEST_REQUIRES_ARM_NEON; 10710 for (size_t k = 16; k <= 80; k += 8) { 10711 GemmMicrokernelTester() 10712 .mr(4) 10713 .nr(16) 10714 .kr(4) 10715 .sr(1) 10716 .m(4) 10717 .n(16) 10718 .k(k) 10719 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10720 } 10721 } 10722 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_div_8_strided_a)10723 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_div_8_strided_a) { 10724 TEST_REQUIRES_ARM_NEON; 10725 for (size_t k = 16; k <= 80; k += 8) { 10726 GemmMicrokernelTester() 10727 .mr(4) 10728 .nr(16) 10729 .kr(4) 10730 .sr(1) 10731 .m(4) 10732 .n(16) 10733 .k(k) 10734 .a_stride(83) 10735 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10736 } 10737 } 10738 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_div_8_subtile)10739 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_div_8_subtile) { 10740 TEST_REQUIRES_ARM_NEON; 10741 for (size_t k = 16; k <= 80; k += 8) { 10742 for (uint32_t n = 1; n <= 16; n++) { 10743 for (uint32_t m = 1; m <= 4; m++) { 10744 GemmMicrokernelTester() 10745 .mr(4) 10746 .nr(16) 10747 .kr(4) 10748 .sr(1) 10749 .m(m) 10750 .n(n) 10751 .k(k) 10752 .iterations(1) 10753 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10754 } 10755 } 10756 } 10757 } 10758 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,n_gt_16)10759 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_gt_16) { 10760 TEST_REQUIRES_ARM_NEON; 10761 for (uint32_t n = 17; n < 32; n++) { 10762 for (size_t k = 1; k <= 40; k += 9) { 10763 GemmMicrokernelTester() 10764 .mr(4) 10765 .nr(16) 10766 .kr(4) 10767 .sr(1) 10768 .m(4) 10769 .n(n) 10770 .k(k) 10771 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10772 } 10773 } 10774 } 10775 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,n_gt_16_strided_cn)10776 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_gt_16_strided_cn) { 10777 TEST_REQUIRES_ARM_NEON; 10778 for (uint32_t n = 17; n < 32; n++) { 10779 for (size_t k = 1; k <= 40; k += 9) { 10780 GemmMicrokernelTester() 10781 .mr(4) 10782 .nr(16) 10783 .kr(4) 10784 .sr(1) 10785 .m(4) 10786 .n(n) 10787 .k(k) 10788 .cn_stride(19) 10789 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10790 } 10791 } 10792 } 10793 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,n_gt_16_strided_a)10794 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_gt_16_strided_a) { 10795 TEST_REQUIRES_ARM_NEON; 10796 for (uint32_t n = 17; n < 32; n++) { 10797 for (size_t k = 1; k <= 40; k += 9) { 10798 GemmMicrokernelTester() 10799 .mr(4) 10800 .nr(16) 10801 .kr(4) 10802 .sr(1) 10803 .m(4) 10804 .n(n) 10805 .k(k) 10806 .a_stride(43) 10807 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10808 } 10809 } 10810 } 10811 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,n_gt_16_subtile)10812 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_gt_16_subtile) { 10813 TEST_REQUIRES_ARM_NEON; 10814 for (uint32_t n = 17; n < 32; n++) { 10815 for (size_t k = 1; k <= 40; k += 9) { 10816 for (uint32_t m = 1; m <= 4; m++) { 10817 GemmMicrokernelTester() 10818 .mr(4) 10819 .nr(16) 10820 .kr(4) 10821 .sr(1) 10822 .m(m) 10823 .n(n) 10824 .k(k) 10825 .iterations(1) 10826 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10827 } 10828 } 10829 } 10830 } 10831 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,n_div_16)10832 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_div_16) { 10833 TEST_REQUIRES_ARM_NEON; 10834 for (uint32_t n = 32; n <= 48; n += 16) { 10835 for (size_t k = 1; k <= 40; k += 9) { 10836 GemmMicrokernelTester() 10837 .mr(4) 10838 .nr(16) 10839 .kr(4) 10840 .sr(1) 10841 .m(4) 10842 .n(n) 10843 .k(k) 10844 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10845 } 10846 } 10847 } 10848 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,n_div_16_strided_cn)10849 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_div_16_strided_cn) { 10850 TEST_REQUIRES_ARM_NEON; 10851 for (uint32_t n = 32; n <= 48; n += 16) { 10852 for (size_t k = 1; k <= 40; k += 9) { 10853 GemmMicrokernelTester() 10854 .mr(4) 10855 .nr(16) 10856 .kr(4) 10857 .sr(1) 10858 .m(4) 10859 .n(n) 10860 .k(k) 10861 .cn_stride(19) 10862 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10863 } 10864 } 10865 } 10866 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,n_div_16_strided_a)10867 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_div_16_strided_a) { 10868 TEST_REQUIRES_ARM_NEON; 10869 for (uint32_t n = 32; n <= 48; n += 16) { 10870 for (size_t k = 1; k <= 40; k += 9) { 10871 GemmMicrokernelTester() 10872 .mr(4) 10873 .nr(16) 10874 .kr(4) 10875 .sr(1) 10876 .m(4) 10877 .n(n) 10878 .k(k) 10879 .a_stride(43) 10880 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10881 } 10882 } 10883 } 10884 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,n_div_16_subtile)10885 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_div_16_subtile) { 10886 TEST_REQUIRES_ARM_NEON; 10887 for (uint32_t n = 32; n <= 48; n += 16) { 10888 for (size_t k = 1; k <= 40; k += 9) { 10889 for (uint32_t m = 1; m <= 4; m++) { 10890 GemmMicrokernelTester() 10891 .mr(4) 10892 .nr(16) 10893 .kr(4) 10894 .sr(1) 10895 .m(m) 10896 .n(n) 10897 .k(k) 10898 .iterations(1) 10899 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10900 } 10901 } 10902 } 10903 } 10904 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,strided_cm_subtile)10905 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, strided_cm_subtile) { 10906 TEST_REQUIRES_ARM_NEON; 10907 for (size_t k = 1; k <= 40; k += 9) { 10908 for (uint32_t n = 1; n <= 16; n++) { 10909 for (uint32_t m = 1; m <= 4; m++) { 10910 GemmMicrokernelTester() 10911 .mr(4) 10912 .nr(16) 10913 .kr(4) 10914 .sr(1) 10915 .m(m) 10916 .n(n) 10917 .k(k) 10918 .cm_stride(19) 10919 .iterations(1) 10920 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10921 } 10922 } 10923 } 10924 } 10925 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,qmin)10926 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, qmin) { 10927 TEST_REQUIRES_ARM_NEON; 10928 GemmMicrokernelTester() 10929 .mr(4) 10930 .nr(16) 10931 .kr(4) 10932 .sr(1) 10933 .m(4) 10934 .n(16) 10935 .k(8) 10936 .qmin(128) 10937 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10938 } 10939 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,qmax)10940 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, qmax) { 10941 TEST_REQUIRES_ARM_NEON; 10942 GemmMicrokernelTester() 10943 .mr(4) 10944 .nr(16) 10945 .kr(4) 10946 .sr(1) 10947 .m(4) 10948 .n(16) 10949 .k(8) 10950 .qmax(128) 10951 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10952 } 10953 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,strided_cm)10954 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, strided_cm) { 10955 TEST_REQUIRES_ARM_NEON; 10956 GemmMicrokernelTester() 10957 .mr(4) 10958 .nr(16) 10959 .kr(4) 10960 .sr(1) 10961 .m(4) 10962 .n(16) 10963 .k(8) 10964 .cm_stride(19) 10965 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10966 } 10967 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 10968 10969 10970 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_eq_16)10971 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_eq_16) { 10972 TEST_REQUIRES_ARM_NEON; 10973 GemmMicrokernelTester() 10974 .mr(1) 10975 .nr(16) 10976 .kr(4) 10977 .sr(1) 10978 .m(1) 10979 .n(16) 10980 .k(16) 10981 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10982 } 10983 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,strided_cn)10984 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, strided_cn) { 10985 TEST_REQUIRES_ARM_NEON; 10986 GemmMicrokernelTester() 10987 .mr(1) 10988 .nr(16) 10989 .kr(4) 10990 .sr(1) 10991 .m(1) 10992 .n(16) 10993 .k(16) 10994 .cn_stride(19) 10995 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10996 } 10997 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_eq_16_strided_a)10998 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_eq_16_strided_a) { 10999 TEST_REQUIRES_ARM_NEON; 11000 GemmMicrokernelTester() 11001 .mr(1) 11002 .nr(16) 11003 .kr(4) 11004 .sr(1) 11005 .m(1) 11006 .n(16) 11007 .k(16) 11008 .a_stride(19) 11009 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11010 } 11011 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_eq_16_subtile)11012 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_eq_16_subtile) { 11013 TEST_REQUIRES_ARM_NEON; 11014 for (uint32_t n = 1; n <= 16; n++) { 11015 for (uint32_t m = 1; m <= 1; m++) { 11016 GemmMicrokernelTester() 11017 .mr(1) 11018 .nr(16) 11019 .kr(4) 11020 .sr(1) 11021 .m(m) 11022 .n(n) 11023 .k(16) 11024 .iterations(1) 11025 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11026 } 11027 } 11028 } 11029 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_eq_16_subtile_m)11030 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) { 11031 TEST_REQUIRES_ARM_NEON; 11032 for (uint32_t m = 1; m <= 1; m++) { 11033 GemmMicrokernelTester() 11034 .mr(1) 11035 .nr(16) 11036 .kr(4) 11037 .sr(1) 11038 .m(m) 11039 .n(16) 11040 .k(16) 11041 .iterations(1) 11042 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11043 } 11044 } 11045 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_eq_16_subtile_n)11046 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) { 11047 TEST_REQUIRES_ARM_NEON; 11048 for (uint32_t n = 1; n <= 16; n++) { 11049 GemmMicrokernelTester() 11050 .mr(1) 11051 .nr(16) 11052 .kr(4) 11053 .sr(1) 11054 .m(1) 11055 .n(n) 11056 .k(16) 11057 .iterations(1) 11058 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11059 } 11060 } 11061 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_lt_16)11062 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_lt_16) { 11063 TEST_REQUIRES_ARM_NEON; 11064 for (size_t k = 1; k < 16; k++) { 11065 GemmMicrokernelTester() 11066 .mr(1) 11067 .nr(16) 11068 .kr(4) 11069 .sr(1) 11070 .m(1) 11071 .n(16) 11072 .k(k) 11073 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11074 } 11075 } 11076 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_lt_16_strided_a)11077 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_lt_16_strided_a) { 11078 TEST_REQUIRES_ARM_NEON; 11079 for (size_t k = 1; k < 16; k++) { 11080 GemmMicrokernelTester() 11081 .mr(1) 11082 .nr(16) 11083 .kr(4) 11084 .sr(1) 11085 .m(1) 11086 .n(16) 11087 .k(k) 11088 .a_stride(19) 11089 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11090 } 11091 } 11092 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_lt_16_subtile)11093 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_lt_16_subtile) { 11094 TEST_REQUIRES_ARM_NEON; 11095 for (size_t k = 1; k < 16; k++) { 11096 for (uint32_t n = 1; n <= 16; n++) { 11097 for (uint32_t m = 1; m <= 1; m++) { 11098 GemmMicrokernelTester() 11099 .mr(1) 11100 .nr(16) 11101 .kr(4) 11102 .sr(1) 11103 .m(m) 11104 .n(n) 11105 .k(k) 11106 .iterations(1) 11107 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11108 } 11109 } 11110 } 11111 } 11112 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_gt_16)11113 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_gt_16) { 11114 TEST_REQUIRES_ARM_NEON; 11115 for (size_t k = 17; k < 32; k++) { 11116 GemmMicrokernelTester() 11117 .mr(1) 11118 .nr(16) 11119 .kr(4) 11120 .sr(1) 11121 .m(1) 11122 .n(16) 11123 .k(k) 11124 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11125 } 11126 } 11127 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_gt_16_strided_a)11128 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_gt_16_strided_a) { 11129 TEST_REQUIRES_ARM_NEON; 11130 for (size_t k = 17; k < 32; k++) { 11131 GemmMicrokernelTester() 11132 .mr(1) 11133 .nr(16) 11134 .kr(4) 11135 .sr(1) 11136 .m(1) 11137 .n(16) 11138 .k(k) 11139 .a_stride(37) 11140 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11141 } 11142 } 11143 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_gt_16_subtile)11144 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_gt_16_subtile) { 11145 TEST_REQUIRES_ARM_NEON; 11146 for (size_t k = 17; k < 32; k++) { 11147 for (uint32_t n = 1; n <= 16; n++) { 11148 for (uint32_t m = 1; m <= 1; m++) { 11149 GemmMicrokernelTester() 11150 .mr(1) 11151 .nr(16) 11152 .kr(4) 11153 .sr(1) 11154 .m(m) 11155 .n(n) 11156 .k(k) 11157 .iterations(1) 11158 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11159 } 11160 } 11161 } 11162 } 11163 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_div_16)11164 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_div_16) { 11165 TEST_REQUIRES_ARM_NEON; 11166 for (size_t k = 32; k <= 160; k += 16) { 11167 GemmMicrokernelTester() 11168 .mr(1) 11169 .nr(16) 11170 .kr(4) 11171 .sr(1) 11172 .m(1) 11173 .n(16) 11174 .k(k) 11175 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11176 } 11177 } 11178 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_div_16_strided_a)11179 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_div_16_strided_a) { 11180 TEST_REQUIRES_ARM_NEON; 11181 for (size_t k = 32; k <= 160; k += 16) { 11182 GemmMicrokernelTester() 11183 .mr(1) 11184 .nr(16) 11185 .kr(4) 11186 .sr(1) 11187 .m(1) 11188 .n(16) 11189 .k(k) 11190 .a_stride(163) 11191 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11192 } 11193 } 11194 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_div_16_subtile)11195 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_div_16_subtile) { 11196 TEST_REQUIRES_ARM_NEON; 11197 for (size_t k = 32; k <= 160; k += 16) { 11198 for (uint32_t n = 1; n <= 16; n++) { 11199 for (uint32_t m = 1; m <= 1; m++) { 11200 GemmMicrokernelTester() 11201 .mr(1) 11202 .nr(16) 11203 .kr(4) 11204 .sr(1) 11205 .m(m) 11206 .n(n) 11207 .k(k) 11208 .iterations(1) 11209 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11210 } 11211 } 11212 } 11213 } 11214 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,n_gt_16)11215 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, n_gt_16) { 11216 TEST_REQUIRES_ARM_NEON; 11217 for (uint32_t n = 17; n < 32; n++) { 11218 for (size_t k = 1; k <= 80; k += 17) { 11219 GemmMicrokernelTester() 11220 .mr(1) 11221 .nr(16) 11222 .kr(4) 11223 .sr(1) 11224 .m(1) 11225 .n(n) 11226 .k(k) 11227 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11228 } 11229 } 11230 } 11231 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,n_gt_16_strided_cn)11232 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, n_gt_16_strided_cn) { 11233 TEST_REQUIRES_ARM_NEON; 11234 for (uint32_t n = 17; n < 32; n++) { 11235 for (size_t k = 1; k <= 80; k += 17) { 11236 GemmMicrokernelTester() 11237 .mr(1) 11238 .nr(16) 11239 .kr(4) 11240 .sr(1) 11241 .m(1) 11242 .n(n) 11243 .k(k) 11244 .cn_stride(19) 11245 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11246 } 11247 } 11248 } 11249 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,n_gt_16_strided_a)11250 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, n_gt_16_strided_a) { 11251 TEST_REQUIRES_ARM_NEON; 11252 for (uint32_t n = 17; n < 32; n++) { 11253 for (size_t k = 1; k <= 80; k += 17) { 11254 GemmMicrokernelTester() 11255 .mr(1) 11256 .nr(16) 11257 .kr(4) 11258 .sr(1) 11259 .m(1) 11260 .n(n) 11261 .k(k) 11262 .a_stride(83) 11263 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11264 } 11265 } 11266 } 11267 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,n_gt_16_subtile)11268 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, n_gt_16_subtile) { 11269 TEST_REQUIRES_ARM_NEON; 11270 for (uint32_t n = 17; n < 32; n++) { 11271 for (size_t k = 1; k <= 80; k += 17) { 11272 for (uint32_t m = 1; m <= 1; m++) { 11273 GemmMicrokernelTester() 11274 .mr(1) 11275 .nr(16) 11276 .kr(4) 11277 .sr(1) 11278 .m(m) 11279 .n(n) 11280 .k(k) 11281 .iterations(1) 11282 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11283 } 11284 } 11285 } 11286 } 11287 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,n_div_16)11288 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, n_div_16) { 11289 TEST_REQUIRES_ARM_NEON; 11290 for (uint32_t n = 32; n <= 48; n += 16) { 11291 for (size_t k = 1; k <= 80; k += 17) { 11292 GemmMicrokernelTester() 11293 .mr(1) 11294 .nr(16) 11295 .kr(4) 11296 .sr(1) 11297 .m(1) 11298 .n(n) 11299 .k(k) 11300 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11301 } 11302 } 11303 } 11304 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,n_div_16_strided_cn)11305 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, n_div_16_strided_cn) { 11306 TEST_REQUIRES_ARM_NEON; 11307 for (uint32_t n = 32; n <= 48; n += 16) { 11308 for (size_t k = 1; k <= 80; k += 17) { 11309 GemmMicrokernelTester() 11310 .mr(1) 11311 .nr(16) 11312 .kr(4) 11313 .sr(1) 11314 .m(1) 11315 .n(n) 11316 .k(k) 11317 .cn_stride(19) 11318 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11319 } 11320 } 11321 } 11322 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,n_div_16_strided_a)11323 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, n_div_16_strided_a) { 11324 TEST_REQUIRES_ARM_NEON; 11325 for (uint32_t n = 32; n <= 48; n += 16) { 11326 for (size_t k = 1; k <= 80; k += 17) { 11327 GemmMicrokernelTester() 11328 .mr(1) 11329 .nr(16) 11330 .kr(4) 11331 .sr(1) 11332 .m(1) 11333 .n(n) 11334 .k(k) 11335 .a_stride(83) 11336 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11337 } 11338 } 11339 } 11340 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,n_div_16_subtile)11341 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, n_div_16_subtile) { 11342 TEST_REQUIRES_ARM_NEON; 11343 for (uint32_t n = 32; n <= 48; n += 16) { 11344 for (size_t k = 1; k <= 80; k += 17) { 11345 for (uint32_t m = 1; m <= 1; m++) { 11346 GemmMicrokernelTester() 11347 .mr(1) 11348 .nr(16) 11349 .kr(4) 11350 .sr(1) 11351 .m(m) 11352 .n(n) 11353 .k(k) 11354 .iterations(1) 11355 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11356 } 11357 } 11358 } 11359 } 11360 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,strided_cm_subtile)11361 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, strided_cm_subtile) { 11362 TEST_REQUIRES_ARM_NEON; 11363 for (size_t k = 1; k <= 80; k += 17) { 11364 for (uint32_t n = 1; n <= 16; n++) { 11365 for (uint32_t m = 1; m <= 1; m++) { 11366 GemmMicrokernelTester() 11367 .mr(1) 11368 .nr(16) 11369 .kr(4) 11370 .sr(1) 11371 .m(m) 11372 .n(n) 11373 .k(k) 11374 .cm_stride(19) 11375 .iterations(1) 11376 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11377 } 11378 } 11379 } 11380 } 11381 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,qmin)11382 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, qmin) { 11383 TEST_REQUIRES_ARM_NEON; 11384 GemmMicrokernelTester() 11385 .mr(1) 11386 .nr(16) 11387 .kr(4) 11388 .sr(1) 11389 .m(1) 11390 .n(16) 11391 .k(16) 11392 .qmin(128) 11393 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11394 } 11395 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,qmax)11396 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, qmax) { 11397 TEST_REQUIRES_ARM_NEON; 11398 GemmMicrokernelTester() 11399 .mr(1) 11400 .nr(16) 11401 .kr(4) 11402 .sr(1) 11403 .m(1) 11404 .n(16) 11405 .k(16) 11406 .qmax(128) 11407 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11408 } 11409 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,strided_cm)11410 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, strided_cm) { 11411 TEST_REQUIRES_ARM_NEON; 11412 GemmMicrokernelTester() 11413 .mr(1) 11414 .nr(16) 11415 .kr(4) 11416 .sr(1) 11417 .m(1) 11418 .n(16) 11419 .k(16) 11420 .cm_stride(19) 11421 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11422 } 11423 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 11424 11425 11426 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_eq_16)11427 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_eq_16) { 11428 TEST_REQUIRES_ARM_NEON; 11429 GemmMicrokernelTester() 11430 .mr(3) 11431 .nr(16) 11432 .kr(4) 11433 .sr(1) 11434 .m(3) 11435 .n(16) 11436 .k(16) 11437 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11438 } 11439 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,strided_cn)11440 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, strided_cn) { 11441 TEST_REQUIRES_ARM_NEON; 11442 GemmMicrokernelTester() 11443 .mr(3) 11444 .nr(16) 11445 .kr(4) 11446 .sr(1) 11447 .m(3) 11448 .n(16) 11449 .k(16) 11450 .cn_stride(19) 11451 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11452 } 11453 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_eq_16_strided_a)11454 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_eq_16_strided_a) { 11455 TEST_REQUIRES_ARM_NEON; 11456 GemmMicrokernelTester() 11457 .mr(3) 11458 .nr(16) 11459 .kr(4) 11460 .sr(1) 11461 .m(3) 11462 .n(16) 11463 .k(16) 11464 .a_stride(19) 11465 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11466 } 11467 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_eq_16_subtile)11468 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_eq_16_subtile) { 11469 TEST_REQUIRES_ARM_NEON; 11470 for (uint32_t n = 1; n <= 16; n++) { 11471 for (uint32_t m = 1; m <= 3; m++) { 11472 GemmMicrokernelTester() 11473 .mr(3) 11474 .nr(16) 11475 .kr(4) 11476 .sr(1) 11477 .m(m) 11478 .n(n) 11479 .k(16) 11480 .iterations(1) 11481 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11482 } 11483 } 11484 } 11485 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_eq_16_subtile_m)11486 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) { 11487 TEST_REQUIRES_ARM_NEON; 11488 for (uint32_t m = 1; m <= 3; m++) { 11489 GemmMicrokernelTester() 11490 .mr(3) 11491 .nr(16) 11492 .kr(4) 11493 .sr(1) 11494 .m(m) 11495 .n(16) 11496 .k(16) 11497 .iterations(1) 11498 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11499 } 11500 } 11501 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_eq_16_subtile_n)11502 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) { 11503 TEST_REQUIRES_ARM_NEON; 11504 for (uint32_t n = 1; n <= 16; n++) { 11505 GemmMicrokernelTester() 11506 .mr(3) 11507 .nr(16) 11508 .kr(4) 11509 .sr(1) 11510 .m(3) 11511 .n(n) 11512 .k(16) 11513 .iterations(1) 11514 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11515 } 11516 } 11517 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_lt_16)11518 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_lt_16) { 11519 TEST_REQUIRES_ARM_NEON; 11520 for (size_t k = 1; k < 16; k++) { 11521 GemmMicrokernelTester() 11522 .mr(3) 11523 .nr(16) 11524 .kr(4) 11525 .sr(1) 11526 .m(3) 11527 .n(16) 11528 .k(k) 11529 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11530 } 11531 } 11532 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_lt_16_strided_a)11533 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_lt_16_strided_a) { 11534 TEST_REQUIRES_ARM_NEON; 11535 for (size_t k = 1; k < 16; k++) { 11536 GemmMicrokernelTester() 11537 .mr(3) 11538 .nr(16) 11539 .kr(4) 11540 .sr(1) 11541 .m(3) 11542 .n(16) 11543 .k(k) 11544 .a_stride(19) 11545 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11546 } 11547 } 11548 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_lt_16_subtile)11549 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_lt_16_subtile) { 11550 TEST_REQUIRES_ARM_NEON; 11551 for (size_t k = 1; k < 16; k++) { 11552 for (uint32_t n = 1; n <= 16; n++) { 11553 for (uint32_t m = 1; m <= 3; m++) { 11554 GemmMicrokernelTester() 11555 .mr(3) 11556 .nr(16) 11557 .kr(4) 11558 .sr(1) 11559 .m(m) 11560 .n(n) 11561 .k(k) 11562 .iterations(1) 11563 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11564 } 11565 } 11566 } 11567 } 11568 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_gt_16)11569 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_gt_16) { 11570 TEST_REQUIRES_ARM_NEON; 11571 for (size_t k = 17; k < 32; k++) { 11572 GemmMicrokernelTester() 11573 .mr(3) 11574 .nr(16) 11575 .kr(4) 11576 .sr(1) 11577 .m(3) 11578 .n(16) 11579 .k(k) 11580 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11581 } 11582 } 11583 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_gt_16_strided_a)11584 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_gt_16_strided_a) { 11585 TEST_REQUIRES_ARM_NEON; 11586 for (size_t k = 17; k < 32; k++) { 11587 GemmMicrokernelTester() 11588 .mr(3) 11589 .nr(16) 11590 .kr(4) 11591 .sr(1) 11592 .m(3) 11593 .n(16) 11594 .k(k) 11595 .a_stride(37) 11596 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11597 } 11598 } 11599 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_gt_16_subtile)11600 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_gt_16_subtile) { 11601 TEST_REQUIRES_ARM_NEON; 11602 for (size_t k = 17; k < 32; k++) { 11603 for (uint32_t n = 1; n <= 16; n++) { 11604 for (uint32_t m = 1; m <= 3; m++) { 11605 GemmMicrokernelTester() 11606 .mr(3) 11607 .nr(16) 11608 .kr(4) 11609 .sr(1) 11610 .m(m) 11611 .n(n) 11612 .k(k) 11613 .iterations(1) 11614 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11615 } 11616 } 11617 } 11618 } 11619 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_div_16)11620 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_div_16) { 11621 TEST_REQUIRES_ARM_NEON; 11622 for (size_t k = 32; k <= 160; k += 16) { 11623 GemmMicrokernelTester() 11624 .mr(3) 11625 .nr(16) 11626 .kr(4) 11627 .sr(1) 11628 .m(3) 11629 .n(16) 11630 .k(k) 11631 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11632 } 11633 } 11634 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_div_16_strided_a)11635 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_div_16_strided_a) { 11636 TEST_REQUIRES_ARM_NEON; 11637 for (size_t k = 32; k <= 160; k += 16) { 11638 GemmMicrokernelTester() 11639 .mr(3) 11640 .nr(16) 11641 .kr(4) 11642 .sr(1) 11643 .m(3) 11644 .n(16) 11645 .k(k) 11646 .a_stride(163) 11647 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11648 } 11649 } 11650 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_div_16_subtile)11651 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_div_16_subtile) { 11652 TEST_REQUIRES_ARM_NEON; 11653 for (size_t k = 32; k <= 160; k += 16) { 11654 for (uint32_t n = 1; n <= 16; n++) { 11655 for (uint32_t m = 1; m <= 3; m++) { 11656 GemmMicrokernelTester() 11657 .mr(3) 11658 .nr(16) 11659 .kr(4) 11660 .sr(1) 11661 .m(m) 11662 .n(n) 11663 .k(k) 11664 .iterations(1) 11665 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11666 } 11667 } 11668 } 11669 } 11670 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,n_gt_16)11671 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, n_gt_16) { 11672 TEST_REQUIRES_ARM_NEON; 11673 for (uint32_t n = 17; n < 32; n++) { 11674 for (size_t k = 1; k <= 80; k += 17) { 11675 GemmMicrokernelTester() 11676 .mr(3) 11677 .nr(16) 11678 .kr(4) 11679 .sr(1) 11680 .m(3) 11681 .n(n) 11682 .k(k) 11683 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11684 } 11685 } 11686 } 11687 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,n_gt_16_strided_cn)11688 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, n_gt_16_strided_cn) { 11689 TEST_REQUIRES_ARM_NEON; 11690 for (uint32_t n = 17; n < 32; n++) { 11691 for (size_t k = 1; k <= 80; k += 17) { 11692 GemmMicrokernelTester() 11693 .mr(3) 11694 .nr(16) 11695 .kr(4) 11696 .sr(1) 11697 .m(3) 11698 .n(n) 11699 .k(k) 11700 .cn_stride(19) 11701 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11702 } 11703 } 11704 } 11705 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,n_gt_16_strided_a)11706 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, n_gt_16_strided_a) { 11707 TEST_REQUIRES_ARM_NEON; 11708 for (uint32_t n = 17; n < 32; n++) { 11709 for (size_t k = 1; k <= 80; k += 17) { 11710 GemmMicrokernelTester() 11711 .mr(3) 11712 .nr(16) 11713 .kr(4) 11714 .sr(1) 11715 .m(3) 11716 .n(n) 11717 .k(k) 11718 .a_stride(83) 11719 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11720 } 11721 } 11722 } 11723 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,n_gt_16_subtile)11724 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, n_gt_16_subtile) { 11725 TEST_REQUIRES_ARM_NEON; 11726 for (uint32_t n = 17; n < 32; n++) { 11727 for (size_t k = 1; k <= 80; k += 17) { 11728 for (uint32_t m = 1; m <= 3; m++) { 11729 GemmMicrokernelTester() 11730 .mr(3) 11731 .nr(16) 11732 .kr(4) 11733 .sr(1) 11734 .m(m) 11735 .n(n) 11736 .k(k) 11737 .iterations(1) 11738 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11739 } 11740 } 11741 } 11742 } 11743 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,n_div_16)11744 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, n_div_16) { 11745 TEST_REQUIRES_ARM_NEON; 11746 for (uint32_t n = 32; n <= 48; n += 16) { 11747 for (size_t k = 1; k <= 80; k += 17) { 11748 GemmMicrokernelTester() 11749 .mr(3) 11750 .nr(16) 11751 .kr(4) 11752 .sr(1) 11753 .m(3) 11754 .n(n) 11755 .k(k) 11756 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11757 } 11758 } 11759 } 11760 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,n_div_16_strided_cn)11761 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, n_div_16_strided_cn) { 11762 TEST_REQUIRES_ARM_NEON; 11763 for (uint32_t n = 32; n <= 48; n += 16) { 11764 for (size_t k = 1; k <= 80; k += 17) { 11765 GemmMicrokernelTester() 11766 .mr(3) 11767 .nr(16) 11768 .kr(4) 11769 .sr(1) 11770 .m(3) 11771 .n(n) 11772 .k(k) 11773 .cn_stride(19) 11774 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11775 } 11776 } 11777 } 11778 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,n_div_16_strided_a)11779 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, n_div_16_strided_a) { 11780 TEST_REQUIRES_ARM_NEON; 11781 for (uint32_t n = 32; n <= 48; n += 16) { 11782 for (size_t k = 1; k <= 80; k += 17) { 11783 GemmMicrokernelTester() 11784 .mr(3) 11785 .nr(16) 11786 .kr(4) 11787 .sr(1) 11788 .m(3) 11789 .n(n) 11790 .k(k) 11791 .a_stride(83) 11792 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11793 } 11794 } 11795 } 11796 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,n_div_16_subtile)11797 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, n_div_16_subtile) { 11798 TEST_REQUIRES_ARM_NEON; 11799 for (uint32_t n = 32; n <= 48; n += 16) { 11800 for (size_t k = 1; k <= 80; k += 17) { 11801 for (uint32_t m = 1; m <= 3; m++) { 11802 GemmMicrokernelTester() 11803 .mr(3) 11804 .nr(16) 11805 .kr(4) 11806 .sr(1) 11807 .m(m) 11808 .n(n) 11809 .k(k) 11810 .iterations(1) 11811 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11812 } 11813 } 11814 } 11815 } 11816 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,strided_cm_subtile)11817 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, strided_cm_subtile) { 11818 TEST_REQUIRES_ARM_NEON; 11819 for (size_t k = 1; k <= 80; k += 17) { 11820 for (uint32_t n = 1; n <= 16; n++) { 11821 for (uint32_t m = 1; m <= 3; m++) { 11822 GemmMicrokernelTester() 11823 .mr(3) 11824 .nr(16) 11825 .kr(4) 11826 .sr(1) 11827 .m(m) 11828 .n(n) 11829 .k(k) 11830 .cm_stride(19) 11831 .iterations(1) 11832 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11833 } 11834 } 11835 } 11836 } 11837 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,qmin)11838 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, qmin) { 11839 TEST_REQUIRES_ARM_NEON; 11840 GemmMicrokernelTester() 11841 .mr(3) 11842 .nr(16) 11843 .kr(4) 11844 .sr(1) 11845 .m(3) 11846 .n(16) 11847 .k(16) 11848 .qmin(128) 11849 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11850 } 11851 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,qmax)11852 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, qmax) { 11853 TEST_REQUIRES_ARM_NEON; 11854 GemmMicrokernelTester() 11855 .mr(3) 11856 .nr(16) 11857 .kr(4) 11858 .sr(1) 11859 .m(3) 11860 .n(16) 11861 .k(16) 11862 .qmax(128) 11863 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11864 } 11865 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,strided_cm)11866 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, strided_cm) { 11867 TEST_REQUIRES_ARM_NEON; 11868 GemmMicrokernelTester() 11869 .mr(3) 11870 .nr(16) 11871 .kr(4) 11872 .sr(1) 11873 .m(3) 11874 .n(16) 11875 .k(16) 11876 .cm_stride(19) 11877 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11878 } 11879 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 11880 11881 11882 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_eq_16)11883 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_eq_16) { 11884 TEST_REQUIRES_ARM_NEON; 11885 GemmMicrokernelTester() 11886 .mr(4) 11887 .nr(16) 11888 .kr(4) 11889 .sr(1) 11890 .m(4) 11891 .n(16) 11892 .k(16) 11893 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11894 } 11895 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,strided_cn)11896 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, strided_cn) { 11897 TEST_REQUIRES_ARM_NEON; 11898 GemmMicrokernelTester() 11899 .mr(4) 11900 .nr(16) 11901 .kr(4) 11902 .sr(1) 11903 .m(4) 11904 .n(16) 11905 .k(16) 11906 .cn_stride(19) 11907 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11908 } 11909 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_eq_16_strided_a)11910 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_eq_16_strided_a) { 11911 TEST_REQUIRES_ARM_NEON; 11912 GemmMicrokernelTester() 11913 .mr(4) 11914 .nr(16) 11915 .kr(4) 11916 .sr(1) 11917 .m(4) 11918 .n(16) 11919 .k(16) 11920 .a_stride(19) 11921 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11922 } 11923 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_eq_16_subtile)11924 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_eq_16_subtile) { 11925 TEST_REQUIRES_ARM_NEON; 11926 for (uint32_t n = 1; n <= 16; n++) { 11927 for (uint32_t m = 1; m <= 4; m++) { 11928 GemmMicrokernelTester() 11929 .mr(4) 11930 .nr(16) 11931 .kr(4) 11932 .sr(1) 11933 .m(m) 11934 .n(n) 11935 .k(16) 11936 .iterations(1) 11937 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11938 } 11939 } 11940 } 11941 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_eq_16_subtile_m)11942 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) { 11943 TEST_REQUIRES_ARM_NEON; 11944 for (uint32_t m = 1; m <= 4; m++) { 11945 GemmMicrokernelTester() 11946 .mr(4) 11947 .nr(16) 11948 .kr(4) 11949 .sr(1) 11950 .m(m) 11951 .n(16) 11952 .k(16) 11953 .iterations(1) 11954 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11955 } 11956 } 11957 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_eq_16_subtile_n)11958 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) { 11959 TEST_REQUIRES_ARM_NEON; 11960 for (uint32_t n = 1; n <= 16; n++) { 11961 GemmMicrokernelTester() 11962 .mr(4) 11963 .nr(16) 11964 .kr(4) 11965 .sr(1) 11966 .m(4) 11967 .n(n) 11968 .k(16) 11969 .iterations(1) 11970 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11971 } 11972 } 11973 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_lt_16)11974 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_lt_16) { 11975 TEST_REQUIRES_ARM_NEON; 11976 for (size_t k = 1; k < 16; k++) { 11977 GemmMicrokernelTester() 11978 .mr(4) 11979 .nr(16) 11980 .kr(4) 11981 .sr(1) 11982 .m(4) 11983 .n(16) 11984 .k(k) 11985 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11986 } 11987 } 11988 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_lt_16_strided_a)11989 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_lt_16_strided_a) { 11990 TEST_REQUIRES_ARM_NEON; 11991 for (size_t k = 1; k < 16; k++) { 11992 GemmMicrokernelTester() 11993 .mr(4) 11994 .nr(16) 11995 .kr(4) 11996 .sr(1) 11997 .m(4) 11998 .n(16) 11999 .k(k) 12000 .a_stride(19) 12001 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12002 } 12003 } 12004 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_lt_16_subtile)12005 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_lt_16_subtile) { 12006 TEST_REQUIRES_ARM_NEON; 12007 for (size_t k = 1; k < 16; k++) { 12008 for (uint32_t n = 1; n <= 16; n++) { 12009 for (uint32_t m = 1; m <= 4; m++) { 12010 GemmMicrokernelTester() 12011 .mr(4) 12012 .nr(16) 12013 .kr(4) 12014 .sr(1) 12015 .m(m) 12016 .n(n) 12017 .k(k) 12018 .iterations(1) 12019 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12020 } 12021 } 12022 } 12023 } 12024 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_gt_16)12025 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_gt_16) { 12026 TEST_REQUIRES_ARM_NEON; 12027 for (size_t k = 17; k < 32; k++) { 12028 GemmMicrokernelTester() 12029 .mr(4) 12030 .nr(16) 12031 .kr(4) 12032 .sr(1) 12033 .m(4) 12034 .n(16) 12035 .k(k) 12036 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12037 } 12038 } 12039 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_gt_16_strided_a)12040 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_gt_16_strided_a) { 12041 TEST_REQUIRES_ARM_NEON; 12042 for (size_t k = 17; k < 32; k++) { 12043 GemmMicrokernelTester() 12044 .mr(4) 12045 .nr(16) 12046 .kr(4) 12047 .sr(1) 12048 .m(4) 12049 .n(16) 12050 .k(k) 12051 .a_stride(37) 12052 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12053 } 12054 } 12055 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_gt_16_subtile)12056 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_gt_16_subtile) { 12057 TEST_REQUIRES_ARM_NEON; 12058 for (size_t k = 17; k < 32; k++) { 12059 for (uint32_t n = 1; n <= 16; n++) { 12060 for (uint32_t m = 1; m <= 4; m++) { 12061 GemmMicrokernelTester() 12062 .mr(4) 12063 .nr(16) 12064 .kr(4) 12065 .sr(1) 12066 .m(m) 12067 .n(n) 12068 .k(k) 12069 .iterations(1) 12070 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12071 } 12072 } 12073 } 12074 } 12075 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_div_16)12076 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_div_16) { 12077 TEST_REQUIRES_ARM_NEON; 12078 for (size_t k = 32; k <= 160; k += 16) { 12079 GemmMicrokernelTester() 12080 .mr(4) 12081 .nr(16) 12082 .kr(4) 12083 .sr(1) 12084 .m(4) 12085 .n(16) 12086 .k(k) 12087 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12088 } 12089 } 12090 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_div_16_strided_a)12091 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_div_16_strided_a) { 12092 TEST_REQUIRES_ARM_NEON; 12093 for (size_t k = 32; k <= 160; k += 16) { 12094 GemmMicrokernelTester() 12095 .mr(4) 12096 .nr(16) 12097 .kr(4) 12098 .sr(1) 12099 .m(4) 12100 .n(16) 12101 .k(k) 12102 .a_stride(163) 12103 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12104 } 12105 } 12106 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_div_16_subtile)12107 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_div_16_subtile) { 12108 TEST_REQUIRES_ARM_NEON; 12109 for (size_t k = 32; k <= 160; k += 16) { 12110 for (uint32_t n = 1; n <= 16; n++) { 12111 for (uint32_t m = 1; m <= 4; m++) { 12112 GemmMicrokernelTester() 12113 .mr(4) 12114 .nr(16) 12115 .kr(4) 12116 .sr(1) 12117 .m(m) 12118 .n(n) 12119 .k(k) 12120 .iterations(1) 12121 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12122 } 12123 } 12124 } 12125 } 12126 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,n_gt_16)12127 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_gt_16) { 12128 TEST_REQUIRES_ARM_NEON; 12129 for (uint32_t n = 17; n < 32; n++) { 12130 for (size_t k = 1; k <= 80; k += 17) { 12131 GemmMicrokernelTester() 12132 .mr(4) 12133 .nr(16) 12134 .kr(4) 12135 .sr(1) 12136 .m(4) 12137 .n(n) 12138 .k(k) 12139 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12140 } 12141 } 12142 } 12143 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,n_gt_16_strided_cn)12144 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_gt_16_strided_cn) { 12145 TEST_REQUIRES_ARM_NEON; 12146 for (uint32_t n = 17; n < 32; n++) { 12147 for (size_t k = 1; k <= 80; k += 17) { 12148 GemmMicrokernelTester() 12149 .mr(4) 12150 .nr(16) 12151 .kr(4) 12152 .sr(1) 12153 .m(4) 12154 .n(n) 12155 .k(k) 12156 .cn_stride(19) 12157 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12158 } 12159 } 12160 } 12161 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,n_gt_16_strided_a)12162 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_gt_16_strided_a) { 12163 TEST_REQUIRES_ARM_NEON; 12164 for (uint32_t n = 17; n < 32; n++) { 12165 for (size_t k = 1; k <= 80; k += 17) { 12166 GemmMicrokernelTester() 12167 .mr(4) 12168 .nr(16) 12169 .kr(4) 12170 .sr(1) 12171 .m(4) 12172 .n(n) 12173 .k(k) 12174 .a_stride(83) 12175 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12176 } 12177 } 12178 } 12179 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,n_gt_16_subtile)12180 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_gt_16_subtile) { 12181 TEST_REQUIRES_ARM_NEON; 12182 for (uint32_t n = 17; n < 32; n++) { 12183 for (size_t k = 1; k <= 80; k += 17) { 12184 for (uint32_t m = 1; m <= 4; m++) { 12185 GemmMicrokernelTester() 12186 .mr(4) 12187 .nr(16) 12188 .kr(4) 12189 .sr(1) 12190 .m(m) 12191 .n(n) 12192 .k(k) 12193 .iterations(1) 12194 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12195 } 12196 } 12197 } 12198 } 12199 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,n_div_16)12200 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_div_16) { 12201 TEST_REQUIRES_ARM_NEON; 12202 for (uint32_t n = 32; n <= 48; n += 16) { 12203 for (size_t k = 1; k <= 80; k += 17) { 12204 GemmMicrokernelTester() 12205 .mr(4) 12206 .nr(16) 12207 .kr(4) 12208 .sr(1) 12209 .m(4) 12210 .n(n) 12211 .k(k) 12212 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12213 } 12214 } 12215 } 12216 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,n_div_16_strided_cn)12217 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_div_16_strided_cn) { 12218 TEST_REQUIRES_ARM_NEON; 12219 for (uint32_t n = 32; n <= 48; n += 16) { 12220 for (size_t k = 1; k <= 80; k += 17) { 12221 GemmMicrokernelTester() 12222 .mr(4) 12223 .nr(16) 12224 .kr(4) 12225 .sr(1) 12226 .m(4) 12227 .n(n) 12228 .k(k) 12229 .cn_stride(19) 12230 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12231 } 12232 } 12233 } 12234 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,n_div_16_strided_a)12235 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_div_16_strided_a) { 12236 TEST_REQUIRES_ARM_NEON; 12237 for (uint32_t n = 32; n <= 48; n += 16) { 12238 for (size_t k = 1; k <= 80; k += 17) { 12239 GemmMicrokernelTester() 12240 .mr(4) 12241 .nr(16) 12242 .kr(4) 12243 .sr(1) 12244 .m(4) 12245 .n(n) 12246 .k(k) 12247 .a_stride(83) 12248 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12249 } 12250 } 12251 } 12252 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,n_div_16_subtile)12253 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_div_16_subtile) { 12254 TEST_REQUIRES_ARM_NEON; 12255 for (uint32_t n = 32; n <= 48; n += 16) { 12256 for (size_t k = 1; k <= 80; k += 17) { 12257 for (uint32_t m = 1; m <= 4; m++) { 12258 GemmMicrokernelTester() 12259 .mr(4) 12260 .nr(16) 12261 .kr(4) 12262 .sr(1) 12263 .m(m) 12264 .n(n) 12265 .k(k) 12266 .iterations(1) 12267 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12268 } 12269 } 12270 } 12271 } 12272 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,strided_cm_subtile)12273 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, strided_cm_subtile) { 12274 TEST_REQUIRES_ARM_NEON; 12275 for (size_t k = 1; k <= 80; k += 17) { 12276 for (uint32_t n = 1; n <= 16; n++) { 12277 for (uint32_t m = 1; m <= 4; m++) { 12278 GemmMicrokernelTester() 12279 .mr(4) 12280 .nr(16) 12281 .kr(4) 12282 .sr(1) 12283 .m(m) 12284 .n(n) 12285 .k(k) 12286 .cm_stride(19) 12287 .iterations(1) 12288 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12289 } 12290 } 12291 } 12292 } 12293 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,qmin)12294 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, qmin) { 12295 TEST_REQUIRES_ARM_NEON; 12296 GemmMicrokernelTester() 12297 .mr(4) 12298 .nr(16) 12299 .kr(4) 12300 .sr(1) 12301 .m(4) 12302 .n(16) 12303 .k(16) 12304 .qmin(128) 12305 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12306 } 12307 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,qmax)12308 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, qmax) { 12309 TEST_REQUIRES_ARM_NEON; 12310 GemmMicrokernelTester() 12311 .mr(4) 12312 .nr(16) 12313 .kr(4) 12314 .sr(1) 12315 .m(4) 12316 .n(16) 12317 .k(16) 12318 .qmax(128) 12319 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12320 } 12321 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,strided_cm)12322 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, strided_cm) { 12323 TEST_REQUIRES_ARM_NEON; 12324 GemmMicrokernelTester() 12325 .mr(4) 12326 .nr(16) 12327 .kr(4) 12328 .sr(1) 12329 .m(4) 12330 .n(16) 12331 .k(16) 12332 .cm_stride(19) 12333 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12334 } 12335 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 12336 12337 12338 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_eq_8)12339 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8) { 12340 TEST_REQUIRES_ARM_NEON; 12341 GemmMicrokernelTester() 12342 .mr(1) 12343 .nr(8) 12344 .kr(4) 12345 .sr(1) 12346 .m(1) 12347 .n(8) 12348 .k(8) 12349 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12350 } 12351 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,strided_cn)12352 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, strided_cn) { 12353 TEST_REQUIRES_ARM_NEON; 12354 GemmMicrokernelTester() 12355 .mr(1) 12356 .nr(8) 12357 .kr(4) 12358 .sr(1) 12359 .m(1) 12360 .n(8) 12361 .k(8) 12362 .cn_stride(11) 12363 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12364 } 12365 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_eq_8_strided_a)12366 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8_strided_a) { 12367 TEST_REQUIRES_ARM_NEON; 12368 GemmMicrokernelTester() 12369 .mr(1) 12370 .nr(8) 12371 .kr(4) 12372 .sr(1) 12373 .m(1) 12374 .n(8) 12375 .k(8) 12376 .a_stride(11) 12377 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12378 } 12379 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_eq_8_subtile)12380 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8_subtile) { 12381 TEST_REQUIRES_ARM_NEON; 12382 for (uint32_t n = 1; n <= 8; n++) { 12383 for (uint32_t m = 1; m <= 1; m++) { 12384 GemmMicrokernelTester() 12385 .mr(1) 12386 .nr(8) 12387 .kr(4) 12388 .sr(1) 12389 .m(m) 12390 .n(n) 12391 .k(8) 12392 .iterations(1) 12393 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12394 } 12395 } 12396 } 12397 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_eq_8_subtile_m)12398 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8_subtile_m) { 12399 TEST_REQUIRES_ARM_NEON; 12400 for (uint32_t m = 1; m <= 1; m++) { 12401 GemmMicrokernelTester() 12402 .mr(1) 12403 .nr(8) 12404 .kr(4) 12405 .sr(1) 12406 .m(m) 12407 .n(8) 12408 .k(8) 12409 .iterations(1) 12410 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12411 } 12412 } 12413 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_eq_8_subtile_n)12414 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8_subtile_n) { 12415 TEST_REQUIRES_ARM_NEON; 12416 for (uint32_t n = 1; n <= 8; n++) { 12417 GemmMicrokernelTester() 12418 .mr(1) 12419 .nr(8) 12420 .kr(4) 12421 .sr(1) 12422 .m(1) 12423 .n(n) 12424 .k(8) 12425 .iterations(1) 12426 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12427 } 12428 } 12429 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_lt_8)12430 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_lt_8) { 12431 TEST_REQUIRES_ARM_NEON; 12432 for (size_t k = 1; k < 8; k++) { 12433 GemmMicrokernelTester() 12434 .mr(1) 12435 .nr(8) 12436 .kr(4) 12437 .sr(1) 12438 .m(1) 12439 .n(8) 12440 .k(k) 12441 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12442 } 12443 } 12444 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_lt_8_strided_a)12445 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_lt_8_strided_a) { 12446 TEST_REQUIRES_ARM_NEON; 12447 for (size_t k = 1; k < 8; k++) { 12448 GemmMicrokernelTester() 12449 .mr(1) 12450 .nr(8) 12451 .kr(4) 12452 .sr(1) 12453 .m(1) 12454 .n(8) 12455 .k(k) 12456 .a_stride(11) 12457 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12458 } 12459 } 12460 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_lt_8_subtile)12461 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_lt_8_subtile) { 12462 TEST_REQUIRES_ARM_NEON; 12463 for (size_t k = 1; k < 8; k++) { 12464 for (uint32_t n = 1; n <= 8; n++) { 12465 for (uint32_t m = 1; m <= 1; m++) { 12466 GemmMicrokernelTester() 12467 .mr(1) 12468 .nr(8) 12469 .kr(4) 12470 .sr(1) 12471 .m(m) 12472 .n(n) 12473 .k(k) 12474 .iterations(1) 12475 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12476 } 12477 } 12478 } 12479 } 12480 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_gt_8)12481 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_gt_8) { 12482 TEST_REQUIRES_ARM_NEON; 12483 for (size_t k = 9; k < 16; k++) { 12484 GemmMicrokernelTester() 12485 .mr(1) 12486 .nr(8) 12487 .kr(4) 12488 .sr(1) 12489 .m(1) 12490 .n(8) 12491 .k(k) 12492 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12493 } 12494 } 12495 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_gt_8_strided_a)12496 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_gt_8_strided_a) { 12497 TEST_REQUIRES_ARM_NEON; 12498 for (size_t k = 9; k < 16; k++) { 12499 GemmMicrokernelTester() 12500 .mr(1) 12501 .nr(8) 12502 .kr(4) 12503 .sr(1) 12504 .m(1) 12505 .n(8) 12506 .k(k) 12507 .a_stride(19) 12508 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12509 } 12510 } 12511 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_gt_8_subtile)12512 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_gt_8_subtile) { 12513 TEST_REQUIRES_ARM_NEON; 12514 for (size_t k = 9; k < 16; k++) { 12515 for (uint32_t n = 1; n <= 8; n++) { 12516 for (uint32_t m = 1; m <= 1; m++) { 12517 GemmMicrokernelTester() 12518 .mr(1) 12519 .nr(8) 12520 .kr(4) 12521 .sr(1) 12522 .m(m) 12523 .n(n) 12524 .k(k) 12525 .iterations(1) 12526 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12527 } 12528 } 12529 } 12530 } 12531 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_div_8)12532 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_div_8) { 12533 TEST_REQUIRES_ARM_NEON; 12534 for (size_t k = 16; k <= 80; k += 8) { 12535 GemmMicrokernelTester() 12536 .mr(1) 12537 .nr(8) 12538 .kr(4) 12539 .sr(1) 12540 .m(1) 12541 .n(8) 12542 .k(k) 12543 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12544 } 12545 } 12546 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_div_8_strided_a)12547 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_div_8_strided_a) { 12548 TEST_REQUIRES_ARM_NEON; 12549 for (size_t k = 16; k <= 80; k += 8) { 12550 GemmMicrokernelTester() 12551 .mr(1) 12552 .nr(8) 12553 .kr(4) 12554 .sr(1) 12555 .m(1) 12556 .n(8) 12557 .k(k) 12558 .a_stride(83) 12559 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12560 } 12561 } 12562 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_div_8_subtile)12563 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_div_8_subtile) { 12564 TEST_REQUIRES_ARM_NEON; 12565 for (size_t k = 16; k <= 80; k += 8) { 12566 for (uint32_t n = 1; n <= 8; n++) { 12567 for (uint32_t m = 1; m <= 1; m++) { 12568 GemmMicrokernelTester() 12569 .mr(1) 12570 .nr(8) 12571 .kr(4) 12572 .sr(1) 12573 .m(m) 12574 .n(n) 12575 .k(k) 12576 .iterations(1) 12577 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12578 } 12579 } 12580 } 12581 } 12582 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,n_gt_8)12583 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_gt_8) { 12584 TEST_REQUIRES_ARM_NEON; 12585 for (uint32_t n = 9; n < 16; n++) { 12586 for (size_t k = 1; k <= 40; k += 9) { 12587 GemmMicrokernelTester() 12588 .mr(1) 12589 .nr(8) 12590 .kr(4) 12591 .sr(1) 12592 .m(1) 12593 .n(n) 12594 .k(k) 12595 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12596 } 12597 } 12598 } 12599 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,n_gt_8_strided_cn)12600 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_gt_8_strided_cn) { 12601 TEST_REQUIRES_ARM_NEON; 12602 for (uint32_t n = 9; n < 16; n++) { 12603 for (size_t k = 1; k <= 40; k += 9) { 12604 GemmMicrokernelTester() 12605 .mr(1) 12606 .nr(8) 12607 .kr(4) 12608 .sr(1) 12609 .m(1) 12610 .n(n) 12611 .k(k) 12612 .cn_stride(11) 12613 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12614 } 12615 } 12616 } 12617 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,n_gt_8_strided_a)12618 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_gt_8_strided_a) { 12619 TEST_REQUIRES_ARM_NEON; 12620 for (uint32_t n = 9; n < 16; n++) { 12621 for (size_t k = 1; k <= 40; k += 9) { 12622 GemmMicrokernelTester() 12623 .mr(1) 12624 .nr(8) 12625 .kr(4) 12626 .sr(1) 12627 .m(1) 12628 .n(n) 12629 .k(k) 12630 .a_stride(43) 12631 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12632 } 12633 } 12634 } 12635 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,n_gt_8_subtile)12636 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_gt_8_subtile) { 12637 TEST_REQUIRES_ARM_NEON; 12638 for (uint32_t n = 9; n < 16; n++) { 12639 for (size_t k = 1; k <= 40; k += 9) { 12640 for (uint32_t m = 1; m <= 1; m++) { 12641 GemmMicrokernelTester() 12642 .mr(1) 12643 .nr(8) 12644 .kr(4) 12645 .sr(1) 12646 .m(m) 12647 .n(n) 12648 .k(k) 12649 .iterations(1) 12650 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12651 } 12652 } 12653 } 12654 } 12655 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,n_div_8)12656 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_div_8) { 12657 TEST_REQUIRES_ARM_NEON; 12658 for (uint32_t n = 16; n <= 24; n += 8) { 12659 for (size_t k = 1; k <= 40; k += 9) { 12660 GemmMicrokernelTester() 12661 .mr(1) 12662 .nr(8) 12663 .kr(4) 12664 .sr(1) 12665 .m(1) 12666 .n(n) 12667 .k(k) 12668 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12669 } 12670 } 12671 } 12672 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,n_div_8_strided_cn)12673 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_div_8_strided_cn) { 12674 TEST_REQUIRES_ARM_NEON; 12675 for (uint32_t n = 16; n <= 24; n += 8) { 12676 for (size_t k = 1; k <= 40; k += 9) { 12677 GemmMicrokernelTester() 12678 .mr(1) 12679 .nr(8) 12680 .kr(4) 12681 .sr(1) 12682 .m(1) 12683 .n(n) 12684 .k(k) 12685 .cn_stride(11) 12686 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12687 } 12688 } 12689 } 12690 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,n_div_8_strided_a)12691 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_div_8_strided_a) { 12692 TEST_REQUIRES_ARM_NEON; 12693 for (uint32_t n = 16; n <= 24; n += 8) { 12694 for (size_t k = 1; k <= 40; k += 9) { 12695 GemmMicrokernelTester() 12696 .mr(1) 12697 .nr(8) 12698 .kr(4) 12699 .sr(1) 12700 .m(1) 12701 .n(n) 12702 .k(k) 12703 .a_stride(43) 12704 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12705 } 12706 } 12707 } 12708 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,n_div_8_subtile)12709 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_div_8_subtile) { 12710 TEST_REQUIRES_ARM_NEON; 12711 for (uint32_t n = 16; n <= 24; n += 8) { 12712 for (size_t k = 1; k <= 40; k += 9) { 12713 for (uint32_t m = 1; m <= 1; m++) { 12714 GemmMicrokernelTester() 12715 .mr(1) 12716 .nr(8) 12717 .kr(4) 12718 .sr(1) 12719 .m(m) 12720 .n(n) 12721 .k(k) 12722 .iterations(1) 12723 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12724 } 12725 } 12726 } 12727 } 12728 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,strided_cm_subtile)12729 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, strided_cm_subtile) { 12730 TEST_REQUIRES_ARM_NEON; 12731 for (size_t k = 1; k <= 40; k += 9) { 12732 for (uint32_t n = 1; n <= 8; n++) { 12733 for (uint32_t m = 1; m <= 1; m++) { 12734 GemmMicrokernelTester() 12735 .mr(1) 12736 .nr(8) 12737 .kr(4) 12738 .sr(1) 12739 .m(m) 12740 .n(n) 12741 .k(k) 12742 .cm_stride(11) 12743 .iterations(1) 12744 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12745 } 12746 } 12747 } 12748 } 12749 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,qmin)12750 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, qmin) { 12751 TEST_REQUIRES_ARM_NEON; 12752 GemmMicrokernelTester() 12753 .mr(1) 12754 .nr(8) 12755 .kr(4) 12756 .sr(1) 12757 .m(1) 12758 .n(8) 12759 .k(8) 12760 .qmin(128) 12761 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12762 } 12763 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,qmax)12764 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, qmax) { 12765 TEST_REQUIRES_ARM_NEON; 12766 GemmMicrokernelTester() 12767 .mr(1) 12768 .nr(8) 12769 .kr(4) 12770 .sr(1) 12771 .m(1) 12772 .n(8) 12773 .k(8) 12774 .qmax(128) 12775 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12776 } 12777 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,strided_cm)12778 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, strided_cm) { 12779 TEST_REQUIRES_ARM_NEON; 12780 GemmMicrokernelTester() 12781 .mr(1) 12782 .nr(8) 12783 .kr(4) 12784 .sr(1) 12785 .m(1) 12786 .n(8) 12787 .k(8) 12788 .cm_stride(11) 12789 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12790 } 12791 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 12792 12793 12794 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_eq_8)12795 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_eq_8) { 12796 TEST_REQUIRES_ARM_NEON; 12797 GemmMicrokernelTester() 12798 .mr(4) 12799 .nr(8) 12800 .kr(4) 12801 .sr(1) 12802 .m(4) 12803 .n(8) 12804 .k(8) 12805 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12806 } 12807 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,strided_cn)12808 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, strided_cn) { 12809 TEST_REQUIRES_ARM_NEON; 12810 GemmMicrokernelTester() 12811 .mr(4) 12812 .nr(8) 12813 .kr(4) 12814 .sr(1) 12815 .m(4) 12816 .n(8) 12817 .k(8) 12818 .cn_stride(11) 12819 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12820 } 12821 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_eq_8_strided_a)12822 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_eq_8_strided_a) { 12823 TEST_REQUIRES_ARM_NEON; 12824 GemmMicrokernelTester() 12825 .mr(4) 12826 .nr(8) 12827 .kr(4) 12828 .sr(1) 12829 .m(4) 12830 .n(8) 12831 .k(8) 12832 .a_stride(11) 12833 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12834 } 12835 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_eq_8_subtile)12836 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_eq_8_subtile) { 12837 TEST_REQUIRES_ARM_NEON; 12838 for (uint32_t n = 1; n <= 8; n++) { 12839 for (uint32_t m = 1; m <= 4; m++) { 12840 GemmMicrokernelTester() 12841 .mr(4) 12842 .nr(8) 12843 .kr(4) 12844 .sr(1) 12845 .m(m) 12846 .n(n) 12847 .k(8) 12848 .iterations(1) 12849 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12850 } 12851 } 12852 } 12853 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_eq_8_subtile_m)12854 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_eq_8_subtile_m) { 12855 TEST_REQUIRES_ARM_NEON; 12856 for (uint32_t m = 1; m <= 4; m++) { 12857 GemmMicrokernelTester() 12858 .mr(4) 12859 .nr(8) 12860 .kr(4) 12861 .sr(1) 12862 .m(m) 12863 .n(8) 12864 .k(8) 12865 .iterations(1) 12866 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12867 } 12868 } 12869 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_eq_8_subtile_n)12870 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_eq_8_subtile_n) { 12871 TEST_REQUIRES_ARM_NEON; 12872 for (uint32_t n = 1; n <= 8; n++) { 12873 GemmMicrokernelTester() 12874 .mr(4) 12875 .nr(8) 12876 .kr(4) 12877 .sr(1) 12878 .m(4) 12879 .n(n) 12880 .k(8) 12881 .iterations(1) 12882 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12883 } 12884 } 12885 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_lt_8)12886 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_lt_8) { 12887 TEST_REQUIRES_ARM_NEON; 12888 for (size_t k = 1; k < 8; k++) { 12889 GemmMicrokernelTester() 12890 .mr(4) 12891 .nr(8) 12892 .kr(4) 12893 .sr(1) 12894 .m(4) 12895 .n(8) 12896 .k(k) 12897 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12898 } 12899 } 12900 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_lt_8_strided_a)12901 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_lt_8_strided_a) { 12902 TEST_REQUIRES_ARM_NEON; 12903 for (size_t k = 1; k < 8; k++) { 12904 GemmMicrokernelTester() 12905 .mr(4) 12906 .nr(8) 12907 .kr(4) 12908 .sr(1) 12909 .m(4) 12910 .n(8) 12911 .k(k) 12912 .a_stride(11) 12913 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12914 } 12915 } 12916 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_lt_8_subtile)12917 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_lt_8_subtile) { 12918 TEST_REQUIRES_ARM_NEON; 12919 for (size_t k = 1; k < 8; k++) { 12920 for (uint32_t n = 1; n <= 8; n++) { 12921 for (uint32_t m = 1; m <= 4; m++) { 12922 GemmMicrokernelTester() 12923 .mr(4) 12924 .nr(8) 12925 .kr(4) 12926 .sr(1) 12927 .m(m) 12928 .n(n) 12929 .k(k) 12930 .iterations(1) 12931 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12932 } 12933 } 12934 } 12935 } 12936 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_gt_8)12937 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_gt_8) { 12938 TEST_REQUIRES_ARM_NEON; 12939 for (size_t k = 9; k < 16; k++) { 12940 GemmMicrokernelTester() 12941 .mr(4) 12942 .nr(8) 12943 .kr(4) 12944 .sr(1) 12945 .m(4) 12946 .n(8) 12947 .k(k) 12948 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12949 } 12950 } 12951 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_gt_8_strided_a)12952 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_gt_8_strided_a) { 12953 TEST_REQUIRES_ARM_NEON; 12954 for (size_t k = 9; k < 16; k++) { 12955 GemmMicrokernelTester() 12956 .mr(4) 12957 .nr(8) 12958 .kr(4) 12959 .sr(1) 12960 .m(4) 12961 .n(8) 12962 .k(k) 12963 .a_stride(19) 12964 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12965 } 12966 } 12967 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_gt_8_subtile)12968 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_gt_8_subtile) { 12969 TEST_REQUIRES_ARM_NEON; 12970 for (size_t k = 9; k < 16; k++) { 12971 for (uint32_t n = 1; n <= 8; n++) { 12972 for (uint32_t m = 1; m <= 4; m++) { 12973 GemmMicrokernelTester() 12974 .mr(4) 12975 .nr(8) 12976 .kr(4) 12977 .sr(1) 12978 .m(m) 12979 .n(n) 12980 .k(k) 12981 .iterations(1) 12982 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12983 } 12984 } 12985 } 12986 } 12987 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_div_8)12988 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_div_8) { 12989 TEST_REQUIRES_ARM_NEON; 12990 for (size_t k = 16; k <= 80; k += 8) { 12991 GemmMicrokernelTester() 12992 .mr(4) 12993 .nr(8) 12994 .kr(4) 12995 .sr(1) 12996 .m(4) 12997 .n(8) 12998 .k(k) 12999 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13000 } 13001 } 13002 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_div_8_strided_a)13003 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_div_8_strided_a) { 13004 TEST_REQUIRES_ARM_NEON; 13005 for (size_t k = 16; k <= 80; k += 8) { 13006 GemmMicrokernelTester() 13007 .mr(4) 13008 .nr(8) 13009 .kr(4) 13010 .sr(1) 13011 .m(4) 13012 .n(8) 13013 .k(k) 13014 .a_stride(83) 13015 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13016 } 13017 } 13018 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_div_8_subtile)13019 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_div_8_subtile) { 13020 TEST_REQUIRES_ARM_NEON; 13021 for (size_t k = 16; k <= 80; k += 8) { 13022 for (uint32_t n = 1; n <= 8; n++) { 13023 for (uint32_t m = 1; m <= 4; m++) { 13024 GemmMicrokernelTester() 13025 .mr(4) 13026 .nr(8) 13027 .kr(4) 13028 .sr(1) 13029 .m(m) 13030 .n(n) 13031 .k(k) 13032 .iterations(1) 13033 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13034 } 13035 } 13036 } 13037 } 13038 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,n_gt_8)13039 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, n_gt_8) { 13040 TEST_REQUIRES_ARM_NEON; 13041 for (uint32_t n = 9; n < 16; n++) { 13042 for (size_t k = 1; k <= 40; k += 9) { 13043 GemmMicrokernelTester() 13044 .mr(4) 13045 .nr(8) 13046 .kr(4) 13047 .sr(1) 13048 .m(4) 13049 .n(n) 13050 .k(k) 13051 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13052 } 13053 } 13054 } 13055 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,n_gt_8_strided_cn)13056 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, n_gt_8_strided_cn) { 13057 TEST_REQUIRES_ARM_NEON; 13058 for (uint32_t n = 9; n < 16; n++) { 13059 for (size_t k = 1; k <= 40; k += 9) { 13060 GemmMicrokernelTester() 13061 .mr(4) 13062 .nr(8) 13063 .kr(4) 13064 .sr(1) 13065 .m(4) 13066 .n(n) 13067 .k(k) 13068 .cn_stride(11) 13069 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13070 } 13071 } 13072 } 13073 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,n_gt_8_strided_a)13074 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, n_gt_8_strided_a) { 13075 TEST_REQUIRES_ARM_NEON; 13076 for (uint32_t n = 9; n < 16; n++) { 13077 for (size_t k = 1; k <= 40; k += 9) { 13078 GemmMicrokernelTester() 13079 .mr(4) 13080 .nr(8) 13081 .kr(4) 13082 .sr(1) 13083 .m(4) 13084 .n(n) 13085 .k(k) 13086 .a_stride(43) 13087 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13088 } 13089 } 13090 } 13091 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,n_gt_8_subtile)13092 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, n_gt_8_subtile) { 13093 TEST_REQUIRES_ARM_NEON; 13094 for (uint32_t n = 9; n < 16; n++) { 13095 for (size_t k = 1; k <= 40; k += 9) { 13096 for (uint32_t m = 1; m <= 4; m++) { 13097 GemmMicrokernelTester() 13098 .mr(4) 13099 .nr(8) 13100 .kr(4) 13101 .sr(1) 13102 .m(m) 13103 .n(n) 13104 .k(k) 13105 .iterations(1) 13106 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13107 } 13108 } 13109 } 13110 } 13111 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,n_div_8)13112 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, n_div_8) { 13113 TEST_REQUIRES_ARM_NEON; 13114 for (uint32_t n = 16; n <= 24; n += 8) { 13115 for (size_t k = 1; k <= 40; k += 9) { 13116 GemmMicrokernelTester() 13117 .mr(4) 13118 .nr(8) 13119 .kr(4) 13120 .sr(1) 13121 .m(4) 13122 .n(n) 13123 .k(k) 13124 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13125 } 13126 } 13127 } 13128 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,n_div_8_strided_cn)13129 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, n_div_8_strided_cn) { 13130 TEST_REQUIRES_ARM_NEON; 13131 for (uint32_t n = 16; n <= 24; n += 8) { 13132 for (size_t k = 1; k <= 40; k += 9) { 13133 GemmMicrokernelTester() 13134 .mr(4) 13135 .nr(8) 13136 .kr(4) 13137 .sr(1) 13138 .m(4) 13139 .n(n) 13140 .k(k) 13141 .cn_stride(11) 13142 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13143 } 13144 } 13145 } 13146 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,n_div_8_strided_a)13147 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, n_div_8_strided_a) { 13148 TEST_REQUIRES_ARM_NEON; 13149 for (uint32_t n = 16; n <= 24; n += 8) { 13150 for (size_t k = 1; k <= 40; k += 9) { 13151 GemmMicrokernelTester() 13152 .mr(4) 13153 .nr(8) 13154 .kr(4) 13155 .sr(1) 13156 .m(4) 13157 .n(n) 13158 .k(k) 13159 .a_stride(43) 13160 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13161 } 13162 } 13163 } 13164 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,n_div_8_subtile)13165 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, n_div_8_subtile) { 13166 TEST_REQUIRES_ARM_NEON; 13167 for (uint32_t n = 16; n <= 24; n += 8) { 13168 for (size_t k = 1; k <= 40; k += 9) { 13169 for (uint32_t m = 1; m <= 4; m++) { 13170 GemmMicrokernelTester() 13171 .mr(4) 13172 .nr(8) 13173 .kr(4) 13174 .sr(1) 13175 .m(m) 13176 .n(n) 13177 .k(k) 13178 .iterations(1) 13179 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13180 } 13181 } 13182 } 13183 } 13184 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,strided_cm_subtile)13185 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, strided_cm_subtile) { 13186 TEST_REQUIRES_ARM_NEON; 13187 for (size_t k = 1; k <= 40; k += 9) { 13188 for (uint32_t n = 1; n <= 8; n++) { 13189 for (uint32_t m = 1; m <= 4; m++) { 13190 GemmMicrokernelTester() 13191 .mr(4) 13192 .nr(8) 13193 .kr(4) 13194 .sr(1) 13195 .m(m) 13196 .n(n) 13197 .k(k) 13198 .cm_stride(11) 13199 .iterations(1) 13200 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13201 } 13202 } 13203 } 13204 } 13205 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,qmin)13206 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, qmin) { 13207 TEST_REQUIRES_ARM_NEON; 13208 GemmMicrokernelTester() 13209 .mr(4) 13210 .nr(8) 13211 .kr(4) 13212 .sr(1) 13213 .m(4) 13214 .n(8) 13215 .k(8) 13216 .qmin(128) 13217 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13218 } 13219 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,qmax)13220 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, qmax) { 13221 TEST_REQUIRES_ARM_NEON; 13222 GemmMicrokernelTester() 13223 .mr(4) 13224 .nr(8) 13225 .kr(4) 13226 .sr(1) 13227 .m(4) 13228 .n(8) 13229 .k(8) 13230 .qmax(128) 13231 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13232 } 13233 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,strided_cm)13234 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, strided_cm) { 13235 TEST_REQUIRES_ARM_NEON; 13236 GemmMicrokernelTester() 13237 .mr(4) 13238 .nr(8) 13239 .kr(4) 13240 .sr(1) 13241 .m(4) 13242 .n(8) 13243 .k(8) 13244 .cm_stride(11) 13245 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13246 } 13247 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 13248 13249 13250 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_eq_16)13251 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16) { 13252 TEST_REQUIRES_ARM_NEON; 13253 GemmMicrokernelTester() 13254 .mr(1) 13255 .nr(8) 13256 .kr(4) 13257 .sr(1) 13258 .m(1) 13259 .n(8) 13260 .k(16) 13261 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13262 } 13263 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,strided_cn)13264 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, strided_cn) { 13265 TEST_REQUIRES_ARM_NEON; 13266 GemmMicrokernelTester() 13267 .mr(1) 13268 .nr(8) 13269 .kr(4) 13270 .sr(1) 13271 .m(1) 13272 .n(8) 13273 .k(16) 13274 .cn_stride(11) 13275 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13276 } 13277 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_eq_16_strided_a)13278 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16_strided_a) { 13279 TEST_REQUIRES_ARM_NEON; 13280 GemmMicrokernelTester() 13281 .mr(1) 13282 .nr(8) 13283 .kr(4) 13284 .sr(1) 13285 .m(1) 13286 .n(8) 13287 .k(16) 13288 .a_stride(19) 13289 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13290 } 13291 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_eq_16_subtile)13292 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile) { 13293 TEST_REQUIRES_ARM_NEON; 13294 for (uint32_t n = 1; n <= 8; n++) { 13295 for (uint32_t m = 1; m <= 1; m++) { 13296 GemmMicrokernelTester() 13297 .mr(1) 13298 .nr(8) 13299 .kr(4) 13300 .sr(1) 13301 .m(m) 13302 .n(n) 13303 .k(16) 13304 .iterations(1) 13305 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13306 } 13307 } 13308 } 13309 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_m)13310 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) { 13311 TEST_REQUIRES_ARM_NEON; 13312 for (uint32_t m = 1; m <= 1; m++) { 13313 GemmMicrokernelTester() 13314 .mr(1) 13315 .nr(8) 13316 .kr(4) 13317 .sr(1) 13318 .m(m) 13319 .n(8) 13320 .k(16) 13321 .iterations(1) 13322 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13323 } 13324 } 13325 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_n)13326 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) { 13327 TEST_REQUIRES_ARM_NEON; 13328 for (uint32_t n = 1; n <= 8; n++) { 13329 GemmMicrokernelTester() 13330 .mr(1) 13331 .nr(8) 13332 .kr(4) 13333 .sr(1) 13334 .m(1) 13335 .n(n) 13336 .k(16) 13337 .iterations(1) 13338 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13339 } 13340 } 13341 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_lt_16)13342 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_lt_16) { 13343 TEST_REQUIRES_ARM_NEON; 13344 for (size_t k = 1; k < 16; k++) { 13345 GemmMicrokernelTester() 13346 .mr(1) 13347 .nr(8) 13348 .kr(4) 13349 .sr(1) 13350 .m(1) 13351 .n(8) 13352 .k(k) 13353 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13354 } 13355 } 13356 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_lt_16_strided_a)13357 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_lt_16_strided_a) { 13358 TEST_REQUIRES_ARM_NEON; 13359 for (size_t k = 1; k < 16; k++) { 13360 GemmMicrokernelTester() 13361 .mr(1) 13362 .nr(8) 13363 .kr(4) 13364 .sr(1) 13365 .m(1) 13366 .n(8) 13367 .k(k) 13368 .a_stride(19) 13369 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13370 } 13371 } 13372 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_lt_16_subtile)13373 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_lt_16_subtile) { 13374 TEST_REQUIRES_ARM_NEON; 13375 for (size_t k = 1; k < 16; k++) { 13376 for (uint32_t n = 1; n <= 8; n++) { 13377 for (uint32_t m = 1; m <= 1; m++) { 13378 GemmMicrokernelTester() 13379 .mr(1) 13380 .nr(8) 13381 .kr(4) 13382 .sr(1) 13383 .m(m) 13384 .n(n) 13385 .k(k) 13386 .iterations(1) 13387 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13388 } 13389 } 13390 } 13391 } 13392 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_gt_16)13393 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_gt_16) { 13394 TEST_REQUIRES_ARM_NEON; 13395 for (size_t k = 17; k < 32; k++) { 13396 GemmMicrokernelTester() 13397 .mr(1) 13398 .nr(8) 13399 .kr(4) 13400 .sr(1) 13401 .m(1) 13402 .n(8) 13403 .k(k) 13404 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13405 } 13406 } 13407 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_gt_16_strided_a)13408 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_gt_16_strided_a) { 13409 TEST_REQUIRES_ARM_NEON; 13410 for (size_t k = 17; k < 32; k++) { 13411 GemmMicrokernelTester() 13412 .mr(1) 13413 .nr(8) 13414 .kr(4) 13415 .sr(1) 13416 .m(1) 13417 .n(8) 13418 .k(k) 13419 .a_stride(37) 13420 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13421 } 13422 } 13423 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_gt_16_subtile)13424 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_gt_16_subtile) { 13425 TEST_REQUIRES_ARM_NEON; 13426 for (size_t k = 17; k < 32; k++) { 13427 for (uint32_t n = 1; n <= 8; n++) { 13428 for (uint32_t m = 1; m <= 1; m++) { 13429 GemmMicrokernelTester() 13430 .mr(1) 13431 .nr(8) 13432 .kr(4) 13433 .sr(1) 13434 .m(m) 13435 .n(n) 13436 .k(k) 13437 .iterations(1) 13438 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13439 } 13440 } 13441 } 13442 } 13443 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_div_16)13444 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_div_16) { 13445 TEST_REQUIRES_ARM_NEON; 13446 for (size_t k = 32; k <= 160; k += 16) { 13447 GemmMicrokernelTester() 13448 .mr(1) 13449 .nr(8) 13450 .kr(4) 13451 .sr(1) 13452 .m(1) 13453 .n(8) 13454 .k(k) 13455 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13456 } 13457 } 13458 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_div_16_strided_a)13459 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_div_16_strided_a) { 13460 TEST_REQUIRES_ARM_NEON; 13461 for (size_t k = 32; k <= 160; k += 16) { 13462 GemmMicrokernelTester() 13463 .mr(1) 13464 .nr(8) 13465 .kr(4) 13466 .sr(1) 13467 .m(1) 13468 .n(8) 13469 .k(k) 13470 .a_stride(163) 13471 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13472 } 13473 } 13474 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_div_16_subtile)13475 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_div_16_subtile) { 13476 TEST_REQUIRES_ARM_NEON; 13477 for (size_t k = 32; k <= 160; k += 16) { 13478 for (uint32_t n = 1; n <= 8; n++) { 13479 for (uint32_t m = 1; m <= 1; m++) { 13480 GemmMicrokernelTester() 13481 .mr(1) 13482 .nr(8) 13483 .kr(4) 13484 .sr(1) 13485 .m(m) 13486 .n(n) 13487 .k(k) 13488 .iterations(1) 13489 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13490 } 13491 } 13492 } 13493 } 13494 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,n_gt_8)13495 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_gt_8) { 13496 TEST_REQUIRES_ARM_NEON; 13497 for (uint32_t n = 9; n < 16; n++) { 13498 for (size_t k = 1; k <= 80; k += 17) { 13499 GemmMicrokernelTester() 13500 .mr(1) 13501 .nr(8) 13502 .kr(4) 13503 .sr(1) 13504 .m(1) 13505 .n(n) 13506 .k(k) 13507 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13508 } 13509 } 13510 } 13511 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,n_gt_8_strided_cn)13512 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_gt_8_strided_cn) { 13513 TEST_REQUIRES_ARM_NEON; 13514 for (uint32_t n = 9; n < 16; n++) { 13515 for (size_t k = 1; k <= 80; k += 17) { 13516 GemmMicrokernelTester() 13517 .mr(1) 13518 .nr(8) 13519 .kr(4) 13520 .sr(1) 13521 .m(1) 13522 .n(n) 13523 .k(k) 13524 .cn_stride(11) 13525 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13526 } 13527 } 13528 } 13529 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,n_gt_8_strided_a)13530 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_gt_8_strided_a) { 13531 TEST_REQUIRES_ARM_NEON; 13532 for (uint32_t n = 9; n < 16; n++) { 13533 for (size_t k = 1; k <= 80; k += 17) { 13534 GemmMicrokernelTester() 13535 .mr(1) 13536 .nr(8) 13537 .kr(4) 13538 .sr(1) 13539 .m(1) 13540 .n(n) 13541 .k(k) 13542 .a_stride(83) 13543 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13544 } 13545 } 13546 } 13547 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,n_gt_8_subtile)13548 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_gt_8_subtile) { 13549 TEST_REQUIRES_ARM_NEON; 13550 for (uint32_t n = 9; n < 16; n++) { 13551 for (size_t k = 1; k <= 80; k += 17) { 13552 for (uint32_t m = 1; m <= 1; m++) { 13553 GemmMicrokernelTester() 13554 .mr(1) 13555 .nr(8) 13556 .kr(4) 13557 .sr(1) 13558 .m(m) 13559 .n(n) 13560 .k(k) 13561 .iterations(1) 13562 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13563 } 13564 } 13565 } 13566 } 13567 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,n_div_8)13568 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_div_8) { 13569 TEST_REQUIRES_ARM_NEON; 13570 for (uint32_t n = 16; n <= 24; n += 8) { 13571 for (size_t k = 1; k <= 80; k += 17) { 13572 GemmMicrokernelTester() 13573 .mr(1) 13574 .nr(8) 13575 .kr(4) 13576 .sr(1) 13577 .m(1) 13578 .n(n) 13579 .k(k) 13580 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13581 } 13582 } 13583 } 13584 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,n_div_8_strided_cn)13585 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_div_8_strided_cn) { 13586 TEST_REQUIRES_ARM_NEON; 13587 for (uint32_t n = 16; n <= 24; n += 8) { 13588 for (size_t k = 1; k <= 80; k += 17) { 13589 GemmMicrokernelTester() 13590 .mr(1) 13591 .nr(8) 13592 .kr(4) 13593 .sr(1) 13594 .m(1) 13595 .n(n) 13596 .k(k) 13597 .cn_stride(11) 13598 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13599 } 13600 } 13601 } 13602 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,n_div_8_strided_a)13603 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_div_8_strided_a) { 13604 TEST_REQUIRES_ARM_NEON; 13605 for (uint32_t n = 16; n <= 24; n += 8) { 13606 for (size_t k = 1; k <= 80; k += 17) { 13607 GemmMicrokernelTester() 13608 .mr(1) 13609 .nr(8) 13610 .kr(4) 13611 .sr(1) 13612 .m(1) 13613 .n(n) 13614 .k(k) 13615 .a_stride(83) 13616 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13617 } 13618 } 13619 } 13620 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,n_div_8_subtile)13621 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_div_8_subtile) { 13622 TEST_REQUIRES_ARM_NEON; 13623 for (uint32_t n = 16; n <= 24; n += 8) { 13624 for (size_t k = 1; k <= 80; k += 17) { 13625 for (uint32_t m = 1; m <= 1; m++) { 13626 GemmMicrokernelTester() 13627 .mr(1) 13628 .nr(8) 13629 .kr(4) 13630 .sr(1) 13631 .m(m) 13632 .n(n) 13633 .k(k) 13634 .iterations(1) 13635 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13636 } 13637 } 13638 } 13639 } 13640 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,strided_cm_subtile)13641 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, strided_cm_subtile) { 13642 TEST_REQUIRES_ARM_NEON; 13643 for (size_t k = 1; k <= 80; k += 17) { 13644 for (uint32_t n = 1; n <= 8; n++) { 13645 for (uint32_t m = 1; m <= 1; m++) { 13646 GemmMicrokernelTester() 13647 .mr(1) 13648 .nr(8) 13649 .kr(4) 13650 .sr(1) 13651 .m(m) 13652 .n(n) 13653 .k(k) 13654 .cm_stride(11) 13655 .iterations(1) 13656 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13657 } 13658 } 13659 } 13660 } 13661 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,qmin)13662 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, qmin) { 13663 TEST_REQUIRES_ARM_NEON; 13664 GemmMicrokernelTester() 13665 .mr(1) 13666 .nr(8) 13667 .kr(4) 13668 .sr(1) 13669 .m(1) 13670 .n(8) 13671 .k(16) 13672 .qmin(128) 13673 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13674 } 13675 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,qmax)13676 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, qmax) { 13677 TEST_REQUIRES_ARM_NEON; 13678 GemmMicrokernelTester() 13679 .mr(1) 13680 .nr(8) 13681 .kr(4) 13682 .sr(1) 13683 .m(1) 13684 .n(8) 13685 .k(16) 13686 .qmax(128) 13687 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13688 } 13689 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,strided_cm)13690 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, strided_cm) { 13691 TEST_REQUIRES_ARM_NEON; 13692 GemmMicrokernelTester() 13693 .mr(1) 13694 .nr(8) 13695 .kr(4) 13696 .sr(1) 13697 .m(1) 13698 .n(8) 13699 .k(16) 13700 .cm_stride(11) 13701 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13702 } 13703 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 13704 13705 13706 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_eq_16)13707 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_eq_16) { 13708 TEST_REQUIRES_ARM_NEON; 13709 GemmMicrokernelTester() 13710 .mr(3) 13711 .nr(8) 13712 .kr(4) 13713 .sr(1) 13714 .m(3) 13715 .n(8) 13716 .k(16) 13717 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13718 } 13719 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,strided_cn)13720 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, strided_cn) { 13721 TEST_REQUIRES_ARM_NEON; 13722 GemmMicrokernelTester() 13723 .mr(3) 13724 .nr(8) 13725 .kr(4) 13726 .sr(1) 13727 .m(3) 13728 .n(8) 13729 .k(16) 13730 .cn_stride(11) 13731 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13732 } 13733 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_eq_16_strided_a)13734 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_eq_16_strided_a) { 13735 TEST_REQUIRES_ARM_NEON; 13736 GemmMicrokernelTester() 13737 .mr(3) 13738 .nr(8) 13739 .kr(4) 13740 .sr(1) 13741 .m(3) 13742 .n(8) 13743 .k(16) 13744 .a_stride(19) 13745 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13746 } 13747 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_eq_16_subtile)13748 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_eq_16_subtile) { 13749 TEST_REQUIRES_ARM_NEON; 13750 for (uint32_t n = 1; n <= 8; n++) { 13751 for (uint32_t m = 1; m <= 3; m++) { 13752 GemmMicrokernelTester() 13753 .mr(3) 13754 .nr(8) 13755 .kr(4) 13756 .sr(1) 13757 .m(m) 13758 .n(n) 13759 .k(16) 13760 .iterations(1) 13761 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13762 } 13763 } 13764 } 13765 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_m)13766 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) { 13767 TEST_REQUIRES_ARM_NEON; 13768 for (uint32_t m = 1; m <= 3; m++) { 13769 GemmMicrokernelTester() 13770 .mr(3) 13771 .nr(8) 13772 .kr(4) 13773 .sr(1) 13774 .m(m) 13775 .n(8) 13776 .k(16) 13777 .iterations(1) 13778 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13779 } 13780 } 13781 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_n)13782 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) { 13783 TEST_REQUIRES_ARM_NEON; 13784 for (uint32_t n = 1; n <= 8; n++) { 13785 GemmMicrokernelTester() 13786 .mr(3) 13787 .nr(8) 13788 .kr(4) 13789 .sr(1) 13790 .m(3) 13791 .n(n) 13792 .k(16) 13793 .iterations(1) 13794 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13795 } 13796 } 13797 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_lt_16)13798 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_lt_16) { 13799 TEST_REQUIRES_ARM_NEON; 13800 for (size_t k = 1; k < 16; k++) { 13801 GemmMicrokernelTester() 13802 .mr(3) 13803 .nr(8) 13804 .kr(4) 13805 .sr(1) 13806 .m(3) 13807 .n(8) 13808 .k(k) 13809 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13810 } 13811 } 13812 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_lt_16_strided_a)13813 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_lt_16_strided_a) { 13814 TEST_REQUIRES_ARM_NEON; 13815 for (size_t k = 1; k < 16; k++) { 13816 GemmMicrokernelTester() 13817 .mr(3) 13818 .nr(8) 13819 .kr(4) 13820 .sr(1) 13821 .m(3) 13822 .n(8) 13823 .k(k) 13824 .a_stride(19) 13825 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13826 } 13827 } 13828 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_lt_16_subtile)13829 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_lt_16_subtile) { 13830 TEST_REQUIRES_ARM_NEON; 13831 for (size_t k = 1; k < 16; k++) { 13832 for (uint32_t n = 1; n <= 8; n++) { 13833 for (uint32_t m = 1; m <= 3; m++) { 13834 GemmMicrokernelTester() 13835 .mr(3) 13836 .nr(8) 13837 .kr(4) 13838 .sr(1) 13839 .m(m) 13840 .n(n) 13841 .k(k) 13842 .iterations(1) 13843 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13844 } 13845 } 13846 } 13847 } 13848 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_gt_16)13849 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_gt_16) { 13850 TEST_REQUIRES_ARM_NEON; 13851 for (size_t k = 17; k < 32; k++) { 13852 GemmMicrokernelTester() 13853 .mr(3) 13854 .nr(8) 13855 .kr(4) 13856 .sr(1) 13857 .m(3) 13858 .n(8) 13859 .k(k) 13860 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13861 } 13862 } 13863 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_gt_16_strided_a)13864 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_gt_16_strided_a) { 13865 TEST_REQUIRES_ARM_NEON; 13866 for (size_t k = 17; k < 32; k++) { 13867 GemmMicrokernelTester() 13868 .mr(3) 13869 .nr(8) 13870 .kr(4) 13871 .sr(1) 13872 .m(3) 13873 .n(8) 13874 .k(k) 13875 .a_stride(37) 13876 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13877 } 13878 } 13879 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_gt_16_subtile)13880 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_gt_16_subtile) { 13881 TEST_REQUIRES_ARM_NEON; 13882 for (size_t k = 17; k < 32; k++) { 13883 for (uint32_t n = 1; n <= 8; n++) { 13884 for (uint32_t m = 1; m <= 3; m++) { 13885 GemmMicrokernelTester() 13886 .mr(3) 13887 .nr(8) 13888 .kr(4) 13889 .sr(1) 13890 .m(m) 13891 .n(n) 13892 .k(k) 13893 .iterations(1) 13894 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13895 } 13896 } 13897 } 13898 } 13899 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_div_16)13900 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_div_16) { 13901 TEST_REQUIRES_ARM_NEON; 13902 for (size_t k = 32; k <= 160; k += 16) { 13903 GemmMicrokernelTester() 13904 .mr(3) 13905 .nr(8) 13906 .kr(4) 13907 .sr(1) 13908 .m(3) 13909 .n(8) 13910 .k(k) 13911 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13912 } 13913 } 13914 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_div_16_strided_a)13915 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_div_16_strided_a) { 13916 TEST_REQUIRES_ARM_NEON; 13917 for (size_t k = 32; k <= 160; k += 16) { 13918 GemmMicrokernelTester() 13919 .mr(3) 13920 .nr(8) 13921 .kr(4) 13922 .sr(1) 13923 .m(3) 13924 .n(8) 13925 .k(k) 13926 .a_stride(163) 13927 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13928 } 13929 } 13930 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_div_16_subtile)13931 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_div_16_subtile) { 13932 TEST_REQUIRES_ARM_NEON; 13933 for (size_t k = 32; k <= 160; k += 16) { 13934 for (uint32_t n = 1; n <= 8; n++) { 13935 for (uint32_t m = 1; m <= 3; m++) { 13936 GemmMicrokernelTester() 13937 .mr(3) 13938 .nr(8) 13939 .kr(4) 13940 .sr(1) 13941 .m(m) 13942 .n(n) 13943 .k(k) 13944 .iterations(1) 13945 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13946 } 13947 } 13948 } 13949 } 13950 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,n_gt_8)13951 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, n_gt_8) { 13952 TEST_REQUIRES_ARM_NEON; 13953 for (uint32_t n = 9; n < 16; n++) { 13954 for (size_t k = 1; k <= 80; k += 17) { 13955 GemmMicrokernelTester() 13956 .mr(3) 13957 .nr(8) 13958 .kr(4) 13959 .sr(1) 13960 .m(3) 13961 .n(n) 13962 .k(k) 13963 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13964 } 13965 } 13966 } 13967 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,n_gt_8_strided_cn)13968 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, n_gt_8_strided_cn) { 13969 TEST_REQUIRES_ARM_NEON; 13970 for (uint32_t n = 9; n < 16; n++) { 13971 for (size_t k = 1; k <= 80; k += 17) { 13972 GemmMicrokernelTester() 13973 .mr(3) 13974 .nr(8) 13975 .kr(4) 13976 .sr(1) 13977 .m(3) 13978 .n(n) 13979 .k(k) 13980 .cn_stride(11) 13981 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13982 } 13983 } 13984 } 13985 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,n_gt_8_strided_a)13986 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, n_gt_8_strided_a) { 13987 TEST_REQUIRES_ARM_NEON; 13988 for (uint32_t n = 9; n < 16; n++) { 13989 for (size_t k = 1; k <= 80; k += 17) { 13990 GemmMicrokernelTester() 13991 .mr(3) 13992 .nr(8) 13993 .kr(4) 13994 .sr(1) 13995 .m(3) 13996 .n(n) 13997 .k(k) 13998 .a_stride(83) 13999 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14000 } 14001 } 14002 } 14003 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,n_gt_8_subtile)14004 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, n_gt_8_subtile) { 14005 TEST_REQUIRES_ARM_NEON; 14006 for (uint32_t n = 9; n < 16; n++) { 14007 for (size_t k = 1; k <= 80; k += 17) { 14008 for (uint32_t m = 1; m <= 3; m++) { 14009 GemmMicrokernelTester() 14010 .mr(3) 14011 .nr(8) 14012 .kr(4) 14013 .sr(1) 14014 .m(m) 14015 .n(n) 14016 .k(k) 14017 .iterations(1) 14018 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14019 } 14020 } 14021 } 14022 } 14023 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,n_div_8)14024 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, n_div_8) { 14025 TEST_REQUIRES_ARM_NEON; 14026 for (uint32_t n = 16; n <= 24; n += 8) { 14027 for (size_t k = 1; k <= 80; k += 17) { 14028 GemmMicrokernelTester() 14029 .mr(3) 14030 .nr(8) 14031 .kr(4) 14032 .sr(1) 14033 .m(3) 14034 .n(n) 14035 .k(k) 14036 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14037 } 14038 } 14039 } 14040 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,n_div_8_strided_cn)14041 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, n_div_8_strided_cn) { 14042 TEST_REQUIRES_ARM_NEON; 14043 for (uint32_t n = 16; n <= 24; n += 8) { 14044 for (size_t k = 1; k <= 80; k += 17) { 14045 GemmMicrokernelTester() 14046 .mr(3) 14047 .nr(8) 14048 .kr(4) 14049 .sr(1) 14050 .m(3) 14051 .n(n) 14052 .k(k) 14053 .cn_stride(11) 14054 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14055 } 14056 } 14057 } 14058 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,n_div_8_strided_a)14059 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, n_div_8_strided_a) { 14060 TEST_REQUIRES_ARM_NEON; 14061 for (uint32_t n = 16; n <= 24; n += 8) { 14062 for (size_t k = 1; k <= 80; k += 17) { 14063 GemmMicrokernelTester() 14064 .mr(3) 14065 .nr(8) 14066 .kr(4) 14067 .sr(1) 14068 .m(3) 14069 .n(n) 14070 .k(k) 14071 .a_stride(83) 14072 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14073 } 14074 } 14075 } 14076 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,n_div_8_subtile)14077 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, n_div_8_subtile) { 14078 TEST_REQUIRES_ARM_NEON; 14079 for (uint32_t n = 16; n <= 24; n += 8) { 14080 for (size_t k = 1; k <= 80; k += 17) { 14081 for (uint32_t m = 1; m <= 3; m++) { 14082 GemmMicrokernelTester() 14083 .mr(3) 14084 .nr(8) 14085 .kr(4) 14086 .sr(1) 14087 .m(m) 14088 .n(n) 14089 .k(k) 14090 .iterations(1) 14091 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14092 } 14093 } 14094 } 14095 } 14096 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,strided_cm_subtile)14097 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, strided_cm_subtile) { 14098 TEST_REQUIRES_ARM_NEON; 14099 for (size_t k = 1; k <= 80; k += 17) { 14100 for (uint32_t n = 1; n <= 8; n++) { 14101 for (uint32_t m = 1; m <= 3; m++) { 14102 GemmMicrokernelTester() 14103 .mr(3) 14104 .nr(8) 14105 .kr(4) 14106 .sr(1) 14107 .m(m) 14108 .n(n) 14109 .k(k) 14110 .cm_stride(11) 14111 .iterations(1) 14112 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14113 } 14114 } 14115 } 14116 } 14117 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,qmin)14118 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, qmin) { 14119 TEST_REQUIRES_ARM_NEON; 14120 GemmMicrokernelTester() 14121 .mr(3) 14122 .nr(8) 14123 .kr(4) 14124 .sr(1) 14125 .m(3) 14126 .n(8) 14127 .k(16) 14128 .qmin(128) 14129 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14130 } 14131 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,qmax)14132 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, qmax) { 14133 TEST_REQUIRES_ARM_NEON; 14134 GemmMicrokernelTester() 14135 .mr(3) 14136 .nr(8) 14137 .kr(4) 14138 .sr(1) 14139 .m(3) 14140 .n(8) 14141 .k(16) 14142 .qmax(128) 14143 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14144 } 14145 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,strided_cm)14146 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, strided_cm) { 14147 TEST_REQUIRES_ARM_NEON; 14148 GemmMicrokernelTester() 14149 .mr(3) 14150 .nr(8) 14151 .kr(4) 14152 .sr(1) 14153 .m(3) 14154 .n(8) 14155 .k(16) 14156 .cm_stride(11) 14157 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14158 } 14159 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 14160 14161 14162 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16)14163 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16) { 14164 TEST_REQUIRES_ARM_NEON; 14165 GemmMicrokernelTester() 14166 .mr(2) 14167 .nr(8) 14168 .kr(8) 14169 .sr(1) 14170 .m(2) 14171 .n(8) 14172 .k(16) 14173 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14174 } 14175 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cn)14176 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cn) { 14177 TEST_REQUIRES_ARM_NEON; 14178 GemmMicrokernelTester() 14179 .mr(2) 14180 .nr(8) 14181 .kr(8) 14182 .sr(1) 14183 .m(2) 14184 .n(8) 14185 .k(16) 14186 .cn_stride(11) 14187 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14188 } 14189 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_strided_a)14190 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_strided_a) { 14191 TEST_REQUIRES_ARM_NEON; 14192 GemmMicrokernelTester() 14193 .mr(2) 14194 .nr(8) 14195 .kr(8) 14196 .sr(1) 14197 .m(2) 14198 .n(8) 14199 .k(16) 14200 .a_stride(19) 14201 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14202 } 14203 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile)14204 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile) { 14205 TEST_REQUIRES_ARM_NEON; 14206 for (uint32_t n = 1; n <= 8; n++) { 14207 for (uint32_t m = 1; m <= 2; m++) { 14208 GemmMicrokernelTester() 14209 .mr(2) 14210 .nr(8) 14211 .kr(8) 14212 .sr(1) 14213 .m(m) 14214 .n(n) 14215 .k(16) 14216 .iterations(1) 14217 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14218 } 14219 } 14220 } 14221 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile_m)14222 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_m) { 14223 TEST_REQUIRES_ARM_NEON; 14224 for (uint32_t m = 1; m <= 2; m++) { 14225 GemmMicrokernelTester() 14226 .mr(2) 14227 .nr(8) 14228 .kr(8) 14229 .sr(1) 14230 .m(m) 14231 .n(8) 14232 .k(16) 14233 .iterations(1) 14234 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14235 } 14236 } 14237 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile_n)14238 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_n) { 14239 TEST_REQUIRES_ARM_NEON; 14240 for (uint32_t n = 1; n <= 8; n++) { 14241 GemmMicrokernelTester() 14242 .mr(2) 14243 .nr(8) 14244 .kr(8) 14245 .sr(1) 14246 .m(2) 14247 .n(n) 14248 .k(16) 14249 .iterations(1) 14250 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14251 } 14252 } 14253 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16)14254 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16) { 14255 TEST_REQUIRES_ARM_NEON; 14256 for (size_t k = 1; k < 16; k++) { 14257 GemmMicrokernelTester() 14258 .mr(2) 14259 .nr(8) 14260 .kr(8) 14261 .sr(1) 14262 .m(2) 14263 .n(8) 14264 .k(k) 14265 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14266 } 14267 } 14268 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16_strided_a)14269 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16_strided_a) { 14270 TEST_REQUIRES_ARM_NEON; 14271 for (size_t k = 1; k < 16; k++) { 14272 GemmMicrokernelTester() 14273 .mr(2) 14274 .nr(8) 14275 .kr(8) 14276 .sr(1) 14277 .m(2) 14278 .n(8) 14279 .k(k) 14280 .a_stride(19) 14281 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14282 } 14283 } 14284 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16_subtile)14285 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16_subtile) { 14286 TEST_REQUIRES_ARM_NEON; 14287 for (size_t k = 1; k < 16; k++) { 14288 for (uint32_t n = 1; n <= 8; n++) { 14289 for (uint32_t m = 1; m <= 2; m++) { 14290 GemmMicrokernelTester() 14291 .mr(2) 14292 .nr(8) 14293 .kr(8) 14294 .sr(1) 14295 .m(m) 14296 .n(n) 14297 .k(k) 14298 .iterations(1) 14299 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14300 } 14301 } 14302 } 14303 } 14304 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16)14305 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16) { 14306 TEST_REQUIRES_ARM_NEON; 14307 for (size_t k = 17; k < 32; k++) { 14308 GemmMicrokernelTester() 14309 .mr(2) 14310 .nr(8) 14311 .kr(8) 14312 .sr(1) 14313 .m(2) 14314 .n(8) 14315 .k(k) 14316 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14317 } 14318 } 14319 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16_strided_a)14320 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16_strided_a) { 14321 TEST_REQUIRES_ARM_NEON; 14322 for (size_t k = 17; k < 32; k++) { 14323 GemmMicrokernelTester() 14324 .mr(2) 14325 .nr(8) 14326 .kr(8) 14327 .sr(1) 14328 .m(2) 14329 .n(8) 14330 .k(k) 14331 .a_stride(37) 14332 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14333 } 14334 } 14335 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16_subtile)14336 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16_subtile) { 14337 TEST_REQUIRES_ARM_NEON; 14338 for (size_t k = 17; k < 32; k++) { 14339 for (uint32_t n = 1; n <= 8; n++) { 14340 for (uint32_t m = 1; m <= 2; m++) { 14341 GemmMicrokernelTester() 14342 .mr(2) 14343 .nr(8) 14344 .kr(8) 14345 .sr(1) 14346 .m(m) 14347 .n(n) 14348 .k(k) 14349 .iterations(1) 14350 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14351 } 14352 } 14353 } 14354 } 14355 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16)14356 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16) { 14357 TEST_REQUIRES_ARM_NEON; 14358 for (size_t k = 32; k <= 160; k += 16) { 14359 GemmMicrokernelTester() 14360 .mr(2) 14361 .nr(8) 14362 .kr(8) 14363 .sr(1) 14364 .m(2) 14365 .n(8) 14366 .k(k) 14367 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14368 } 14369 } 14370 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16_strided_a)14371 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16_strided_a) { 14372 TEST_REQUIRES_ARM_NEON; 14373 for (size_t k = 32; k <= 160; k += 16) { 14374 GemmMicrokernelTester() 14375 .mr(2) 14376 .nr(8) 14377 .kr(8) 14378 .sr(1) 14379 .m(2) 14380 .n(8) 14381 .k(k) 14382 .a_stride(163) 14383 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14384 } 14385 } 14386 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16_subtile)14387 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16_subtile) { 14388 TEST_REQUIRES_ARM_NEON; 14389 for (size_t k = 32; k <= 160; k += 16) { 14390 for (uint32_t n = 1; n <= 8; n++) { 14391 for (uint32_t m = 1; m <= 2; m++) { 14392 GemmMicrokernelTester() 14393 .mr(2) 14394 .nr(8) 14395 .kr(8) 14396 .sr(1) 14397 .m(m) 14398 .n(n) 14399 .k(k) 14400 .iterations(1) 14401 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14402 } 14403 } 14404 } 14405 } 14406 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8)14407 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8) { 14408 TEST_REQUIRES_ARM_NEON; 14409 for (uint32_t n = 9; n < 16; n++) { 14410 for (size_t k = 1; k <= 80; k += 17) { 14411 GemmMicrokernelTester() 14412 .mr(2) 14413 .nr(8) 14414 .kr(8) 14415 .sr(1) 14416 .m(2) 14417 .n(n) 14418 .k(k) 14419 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14420 } 14421 } 14422 } 14423 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_strided_cn)14424 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_strided_cn) { 14425 TEST_REQUIRES_ARM_NEON; 14426 for (uint32_t n = 9; n < 16; n++) { 14427 for (size_t k = 1; k <= 80; k += 17) { 14428 GemmMicrokernelTester() 14429 .mr(2) 14430 .nr(8) 14431 .kr(8) 14432 .sr(1) 14433 .m(2) 14434 .n(n) 14435 .k(k) 14436 .cn_stride(11) 14437 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14438 } 14439 } 14440 } 14441 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_strided_a)14442 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_strided_a) { 14443 TEST_REQUIRES_ARM_NEON; 14444 for (uint32_t n = 9; n < 16; n++) { 14445 for (size_t k = 1; k <= 80; k += 17) { 14446 GemmMicrokernelTester() 14447 .mr(2) 14448 .nr(8) 14449 .kr(8) 14450 .sr(1) 14451 .m(2) 14452 .n(n) 14453 .k(k) 14454 .a_stride(83) 14455 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14456 } 14457 } 14458 } 14459 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_subtile)14460 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_subtile) { 14461 TEST_REQUIRES_ARM_NEON; 14462 for (uint32_t n = 9; n < 16; n++) { 14463 for (size_t k = 1; k <= 80; k += 17) { 14464 for (uint32_t m = 1; m <= 2; m++) { 14465 GemmMicrokernelTester() 14466 .mr(2) 14467 .nr(8) 14468 .kr(8) 14469 .sr(1) 14470 .m(m) 14471 .n(n) 14472 .k(k) 14473 .iterations(1) 14474 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14475 } 14476 } 14477 } 14478 } 14479 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8)14480 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8) { 14481 TEST_REQUIRES_ARM_NEON; 14482 for (uint32_t n = 16; n <= 24; n += 8) { 14483 for (size_t k = 1; k <= 80; k += 17) { 14484 GemmMicrokernelTester() 14485 .mr(2) 14486 .nr(8) 14487 .kr(8) 14488 .sr(1) 14489 .m(2) 14490 .n(n) 14491 .k(k) 14492 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14493 } 14494 } 14495 } 14496 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_strided_cn)14497 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_strided_cn) { 14498 TEST_REQUIRES_ARM_NEON; 14499 for (uint32_t n = 16; n <= 24; n += 8) { 14500 for (size_t k = 1; k <= 80; k += 17) { 14501 GemmMicrokernelTester() 14502 .mr(2) 14503 .nr(8) 14504 .kr(8) 14505 .sr(1) 14506 .m(2) 14507 .n(n) 14508 .k(k) 14509 .cn_stride(11) 14510 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14511 } 14512 } 14513 } 14514 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_strided_a)14515 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_strided_a) { 14516 TEST_REQUIRES_ARM_NEON; 14517 for (uint32_t n = 16; n <= 24; n += 8) { 14518 for (size_t k = 1; k <= 80; k += 17) { 14519 GemmMicrokernelTester() 14520 .mr(2) 14521 .nr(8) 14522 .kr(8) 14523 .sr(1) 14524 .m(2) 14525 .n(n) 14526 .k(k) 14527 .a_stride(83) 14528 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14529 } 14530 } 14531 } 14532 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_subtile)14533 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_subtile) { 14534 TEST_REQUIRES_ARM_NEON; 14535 for (uint32_t n = 16; n <= 24; n += 8) { 14536 for (size_t k = 1; k <= 80; k += 17) { 14537 for (uint32_t m = 1; m <= 2; m++) { 14538 GemmMicrokernelTester() 14539 .mr(2) 14540 .nr(8) 14541 .kr(8) 14542 .sr(1) 14543 .m(m) 14544 .n(n) 14545 .k(k) 14546 .iterations(1) 14547 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14548 } 14549 } 14550 } 14551 } 14552 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cm_subtile)14553 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm_subtile) { 14554 TEST_REQUIRES_ARM_NEON; 14555 for (size_t k = 1; k <= 80; k += 17) { 14556 for (uint32_t n = 1; n <= 8; n++) { 14557 for (uint32_t m = 1; m <= 2; m++) { 14558 GemmMicrokernelTester() 14559 .mr(2) 14560 .nr(8) 14561 .kr(8) 14562 .sr(1) 14563 .m(m) 14564 .n(n) 14565 .k(k) 14566 .cm_stride(11) 14567 .iterations(1) 14568 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14569 } 14570 } 14571 } 14572 } 14573 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,qmin)14574 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmin) { 14575 TEST_REQUIRES_ARM_NEON; 14576 GemmMicrokernelTester() 14577 .mr(2) 14578 .nr(8) 14579 .kr(8) 14580 .sr(1) 14581 .m(2) 14582 .n(8) 14583 .k(16) 14584 .qmin(128) 14585 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14586 } 14587 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,qmax)14588 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmax) { 14589 TEST_REQUIRES_ARM_NEON; 14590 GemmMicrokernelTester() 14591 .mr(2) 14592 .nr(8) 14593 .kr(8) 14594 .sr(1) 14595 .m(2) 14596 .n(8) 14597 .k(16) 14598 .qmax(128) 14599 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14600 } 14601 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cm)14602 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm) { 14603 TEST_REQUIRES_ARM_NEON; 14604 GemmMicrokernelTester() 14605 .mr(2) 14606 .nr(8) 14607 .kr(8) 14608 .sr(1) 14609 .m(2) 14610 .n(8) 14611 .k(16) 14612 .cm_stride(11) 14613 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14614 } 14615 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY 14616 14617 14618 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_eq_4)14619 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4) { 14620 TEST_REQUIRES_ARM_NEON_DOT; 14621 GemmMicrokernelTester() 14622 .mr(4) 14623 .nr(16) 14624 .kr(4) 14625 .sr(1) 14626 .m(4) 14627 .n(16) 14628 .k(4) 14629 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14630 } 14631 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,strided_cn)14632 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, strided_cn) { 14633 TEST_REQUIRES_ARM_NEON_DOT; 14634 GemmMicrokernelTester() 14635 .mr(4) 14636 .nr(16) 14637 .kr(4) 14638 .sr(1) 14639 .m(4) 14640 .n(16) 14641 .k(4) 14642 .cn_stride(19) 14643 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14644 } 14645 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_eq_4_strided_a)14646 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4_strided_a) { 14647 TEST_REQUIRES_ARM_NEON_DOT; 14648 GemmMicrokernelTester() 14649 .mr(4) 14650 .nr(16) 14651 .kr(4) 14652 .sr(1) 14653 .m(4) 14654 .n(16) 14655 .k(4) 14656 .a_stride(7) 14657 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14658 } 14659 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_eq_4_subtile)14660 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile) { 14661 TEST_REQUIRES_ARM_NEON_DOT; 14662 for (uint32_t n = 1; n <= 16; n++) { 14663 for (uint32_t m = 1; m <= 4; m++) { 14664 GemmMicrokernelTester() 14665 .mr(4) 14666 .nr(16) 14667 .kr(4) 14668 .sr(1) 14669 .m(m) 14670 .n(n) 14671 .k(4) 14672 .iterations(1) 14673 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14674 } 14675 } 14676 } 14677 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_eq_4_subtile_m)14678 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile_m) { 14679 TEST_REQUIRES_ARM_NEON_DOT; 14680 for (uint32_t m = 1; m <= 4; m++) { 14681 GemmMicrokernelTester() 14682 .mr(4) 14683 .nr(16) 14684 .kr(4) 14685 .sr(1) 14686 .m(m) 14687 .n(16) 14688 .k(4) 14689 .iterations(1) 14690 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14691 } 14692 } 14693 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_eq_4_subtile_n)14694 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile_n) { 14695 TEST_REQUIRES_ARM_NEON_DOT; 14696 for (uint32_t n = 1; n <= 16; n++) { 14697 GemmMicrokernelTester() 14698 .mr(4) 14699 .nr(16) 14700 .kr(4) 14701 .sr(1) 14702 .m(4) 14703 .n(n) 14704 .k(4) 14705 .iterations(1) 14706 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14707 } 14708 } 14709 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_lt_4)14710 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_lt_4) { 14711 TEST_REQUIRES_ARM_NEON_DOT; 14712 for (size_t k = 1; k < 4; k++) { 14713 GemmMicrokernelTester() 14714 .mr(4) 14715 .nr(16) 14716 .kr(4) 14717 .sr(1) 14718 .m(4) 14719 .n(16) 14720 .k(k) 14721 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14722 } 14723 } 14724 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_lt_4_strided_a)14725 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_lt_4_strided_a) { 14726 TEST_REQUIRES_ARM_NEON_DOT; 14727 for (size_t k = 1; k < 4; k++) { 14728 GemmMicrokernelTester() 14729 .mr(4) 14730 .nr(16) 14731 .kr(4) 14732 .sr(1) 14733 .m(4) 14734 .n(16) 14735 .k(k) 14736 .a_stride(7) 14737 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14738 } 14739 } 14740 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_lt_4_subtile)14741 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_lt_4_subtile) { 14742 TEST_REQUIRES_ARM_NEON_DOT; 14743 for (size_t k = 1; k < 4; k++) { 14744 for (uint32_t n = 1; n <= 16; n++) { 14745 for (uint32_t m = 1; m <= 4; m++) { 14746 GemmMicrokernelTester() 14747 .mr(4) 14748 .nr(16) 14749 .kr(4) 14750 .sr(1) 14751 .m(m) 14752 .n(n) 14753 .k(k) 14754 .iterations(1) 14755 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14756 } 14757 } 14758 } 14759 } 14760 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_gt_4)14761 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_gt_4) { 14762 TEST_REQUIRES_ARM_NEON_DOT; 14763 for (size_t k = 5; k < 8; k++) { 14764 GemmMicrokernelTester() 14765 .mr(4) 14766 .nr(16) 14767 .kr(4) 14768 .sr(1) 14769 .m(4) 14770 .n(16) 14771 .k(k) 14772 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14773 } 14774 } 14775 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_gt_4_strided_a)14776 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_gt_4_strided_a) { 14777 TEST_REQUIRES_ARM_NEON_DOT; 14778 for (size_t k = 5; k < 8; k++) { 14779 GemmMicrokernelTester() 14780 .mr(4) 14781 .nr(16) 14782 .kr(4) 14783 .sr(1) 14784 .m(4) 14785 .n(16) 14786 .k(k) 14787 .a_stride(11) 14788 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14789 } 14790 } 14791 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_gt_4_subtile)14792 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_gt_4_subtile) { 14793 TEST_REQUIRES_ARM_NEON_DOT; 14794 for (size_t k = 5; k < 8; k++) { 14795 for (uint32_t n = 1; n <= 16; n++) { 14796 for (uint32_t m = 1; m <= 4; m++) { 14797 GemmMicrokernelTester() 14798 .mr(4) 14799 .nr(16) 14800 .kr(4) 14801 .sr(1) 14802 .m(m) 14803 .n(n) 14804 .k(k) 14805 .iterations(1) 14806 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14807 } 14808 } 14809 } 14810 } 14811 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_div_4)14812 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_div_4) { 14813 TEST_REQUIRES_ARM_NEON_DOT; 14814 for (size_t k = 8; k <= 40; k += 4) { 14815 GemmMicrokernelTester() 14816 .mr(4) 14817 .nr(16) 14818 .kr(4) 14819 .sr(1) 14820 .m(4) 14821 .n(16) 14822 .k(k) 14823 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14824 } 14825 } 14826 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_div_4_strided_a)14827 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_div_4_strided_a) { 14828 TEST_REQUIRES_ARM_NEON_DOT; 14829 for (size_t k = 8; k <= 40; k += 4) { 14830 GemmMicrokernelTester() 14831 .mr(4) 14832 .nr(16) 14833 .kr(4) 14834 .sr(1) 14835 .m(4) 14836 .n(16) 14837 .k(k) 14838 .a_stride(43) 14839 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14840 } 14841 } 14842 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_div_4_subtile)14843 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_div_4_subtile) { 14844 TEST_REQUIRES_ARM_NEON_DOT; 14845 for (size_t k = 8; k <= 40; k += 4) { 14846 for (uint32_t n = 1; n <= 16; n++) { 14847 for (uint32_t m = 1; m <= 4; m++) { 14848 GemmMicrokernelTester() 14849 .mr(4) 14850 .nr(16) 14851 .kr(4) 14852 .sr(1) 14853 .m(m) 14854 .n(n) 14855 .k(k) 14856 .iterations(1) 14857 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14858 } 14859 } 14860 } 14861 } 14862 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,n_gt_16)14863 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, n_gt_16) { 14864 TEST_REQUIRES_ARM_NEON_DOT; 14865 for (uint32_t n = 17; n < 32; n++) { 14866 for (size_t k = 1; k <= 20; k += 5) { 14867 GemmMicrokernelTester() 14868 .mr(4) 14869 .nr(16) 14870 .kr(4) 14871 .sr(1) 14872 .m(4) 14873 .n(n) 14874 .k(k) 14875 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14876 } 14877 } 14878 } 14879 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,n_gt_16_strided_cn)14880 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, n_gt_16_strided_cn) { 14881 TEST_REQUIRES_ARM_NEON_DOT; 14882 for (uint32_t n = 17; n < 32; n++) { 14883 for (size_t k = 1; k <= 20; k += 5) { 14884 GemmMicrokernelTester() 14885 .mr(4) 14886 .nr(16) 14887 .kr(4) 14888 .sr(1) 14889 .m(4) 14890 .n(n) 14891 .k(k) 14892 .cn_stride(19) 14893 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14894 } 14895 } 14896 } 14897 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,n_gt_16_strided_a)14898 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, n_gt_16_strided_a) { 14899 TEST_REQUIRES_ARM_NEON_DOT; 14900 for (uint32_t n = 17; n < 32; n++) { 14901 for (size_t k = 1; k <= 20; k += 5) { 14902 GemmMicrokernelTester() 14903 .mr(4) 14904 .nr(16) 14905 .kr(4) 14906 .sr(1) 14907 .m(4) 14908 .n(n) 14909 .k(k) 14910 .a_stride(23) 14911 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14912 } 14913 } 14914 } 14915 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,n_gt_16_subtile)14916 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, n_gt_16_subtile) { 14917 TEST_REQUIRES_ARM_NEON_DOT; 14918 for (uint32_t n = 17; n < 32; n++) { 14919 for (size_t k = 1; k <= 20; k += 5) { 14920 for (uint32_t m = 1; m <= 4; m++) { 14921 GemmMicrokernelTester() 14922 .mr(4) 14923 .nr(16) 14924 .kr(4) 14925 .sr(1) 14926 .m(m) 14927 .n(n) 14928 .k(k) 14929 .iterations(1) 14930 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14931 } 14932 } 14933 } 14934 } 14935 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,n_div_16)14936 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, n_div_16) { 14937 TEST_REQUIRES_ARM_NEON_DOT; 14938 for (uint32_t n = 32; n <= 48; n += 16) { 14939 for (size_t k = 1; k <= 20; k += 5) { 14940 GemmMicrokernelTester() 14941 .mr(4) 14942 .nr(16) 14943 .kr(4) 14944 .sr(1) 14945 .m(4) 14946 .n(n) 14947 .k(k) 14948 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14949 } 14950 } 14951 } 14952 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,n_div_16_strided_cn)14953 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, n_div_16_strided_cn) { 14954 TEST_REQUIRES_ARM_NEON_DOT; 14955 for (uint32_t n = 32; n <= 48; n += 16) { 14956 for (size_t k = 1; k <= 20; k += 5) { 14957 GemmMicrokernelTester() 14958 .mr(4) 14959 .nr(16) 14960 .kr(4) 14961 .sr(1) 14962 .m(4) 14963 .n(n) 14964 .k(k) 14965 .cn_stride(19) 14966 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14967 } 14968 } 14969 } 14970 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,n_div_16_strided_a)14971 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, n_div_16_strided_a) { 14972 TEST_REQUIRES_ARM_NEON_DOT; 14973 for (uint32_t n = 32; n <= 48; n += 16) { 14974 for (size_t k = 1; k <= 20; k += 5) { 14975 GemmMicrokernelTester() 14976 .mr(4) 14977 .nr(16) 14978 .kr(4) 14979 .sr(1) 14980 .m(4) 14981 .n(n) 14982 .k(k) 14983 .a_stride(23) 14984 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14985 } 14986 } 14987 } 14988 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,n_div_16_subtile)14989 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, n_div_16_subtile) { 14990 TEST_REQUIRES_ARM_NEON_DOT; 14991 for (uint32_t n = 32; n <= 48; n += 16) { 14992 for (size_t k = 1; k <= 20; k += 5) { 14993 for (uint32_t m = 1; m <= 4; m++) { 14994 GemmMicrokernelTester() 14995 .mr(4) 14996 .nr(16) 14997 .kr(4) 14998 .sr(1) 14999 .m(m) 15000 .n(n) 15001 .k(k) 15002 .iterations(1) 15003 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15004 } 15005 } 15006 } 15007 } 15008 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,strided_cm_subtile)15009 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, strided_cm_subtile) { 15010 TEST_REQUIRES_ARM_NEON_DOT; 15011 for (size_t k = 1; k <= 20; k += 5) { 15012 for (uint32_t n = 1; n <= 16; n++) { 15013 for (uint32_t m = 1; m <= 4; m++) { 15014 GemmMicrokernelTester() 15015 .mr(4) 15016 .nr(16) 15017 .kr(4) 15018 .sr(1) 15019 .m(m) 15020 .n(n) 15021 .k(k) 15022 .cm_stride(19) 15023 .iterations(1) 15024 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15025 } 15026 } 15027 } 15028 } 15029 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,qmin)15030 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, qmin) { 15031 TEST_REQUIRES_ARM_NEON_DOT; 15032 GemmMicrokernelTester() 15033 .mr(4) 15034 .nr(16) 15035 .kr(4) 15036 .sr(1) 15037 .m(4) 15038 .n(16) 15039 .k(4) 15040 .qmin(128) 15041 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15042 } 15043 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,qmax)15044 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, qmax) { 15045 TEST_REQUIRES_ARM_NEON_DOT; 15046 GemmMicrokernelTester() 15047 .mr(4) 15048 .nr(16) 15049 .kr(4) 15050 .sr(1) 15051 .m(4) 15052 .n(16) 15053 .k(4) 15054 .qmax(128) 15055 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15056 } 15057 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,strided_cm)15058 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, strided_cm) { 15059 TEST_REQUIRES_ARM_NEON_DOT; 15060 GemmMicrokernelTester() 15061 .mr(4) 15062 .nr(16) 15063 .kr(4) 15064 .sr(1) 15065 .m(4) 15066 .n(16) 15067 .k(4) 15068 .cm_stride(19) 15069 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15070 } 15071 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY 15072 15073 15074 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_eq_16)15075 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16) { 15076 TEST_REQUIRES_ARM_NEON; 15077 GemmMicrokernelTester() 15078 .mr(4) 15079 .nr(8) 15080 .kr(8) 15081 .sr(1) 15082 .m(4) 15083 .n(8) 15084 .k(16) 15085 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15086 } 15087 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,strided_cn)15088 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, strided_cn) { 15089 TEST_REQUIRES_ARM_NEON; 15090 GemmMicrokernelTester() 15091 .mr(4) 15092 .nr(8) 15093 .kr(8) 15094 .sr(1) 15095 .m(4) 15096 .n(8) 15097 .k(16) 15098 .cn_stride(11) 15099 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15100 } 15101 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_eq_16_strided_a)15102 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16_strided_a) { 15103 TEST_REQUIRES_ARM_NEON; 15104 GemmMicrokernelTester() 15105 .mr(4) 15106 .nr(8) 15107 .kr(8) 15108 .sr(1) 15109 .m(4) 15110 .n(8) 15111 .k(16) 15112 .a_stride(19) 15113 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15114 } 15115 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_eq_16_subtile)15116 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16_subtile) { 15117 TEST_REQUIRES_ARM_NEON; 15118 for (uint32_t n = 1; n <= 8; n++) { 15119 for (uint32_t m = 1; m <= 4; m++) { 15120 GemmMicrokernelTester() 15121 .mr(4) 15122 .nr(8) 15123 .kr(8) 15124 .sr(1) 15125 .m(m) 15126 .n(n) 15127 .k(16) 15128 .iterations(1) 15129 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15130 } 15131 } 15132 } 15133 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_eq_16_subtile_m)15134 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16_subtile_m) { 15135 TEST_REQUIRES_ARM_NEON; 15136 for (uint32_t m = 1; m <= 4; m++) { 15137 GemmMicrokernelTester() 15138 .mr(4) 15139 .nr(8) 15140 .kr(8) 15141 .sr(1) 15142 .m(m) 15143 .n(8) 15144 .k(16) 15145 .iterations(1) 15146 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15147 } 15148 } 15149 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_eq_16_subtile_n)15150 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16_subtile_n) { 15151 TEST_REQUIRES_ARM_NEON; 15152 for (uint32_t n = 1; n <= 8; n++) { 15153 GemmMicrokernelTester() 15154 .mr(4) 15155 .nr(8) 15156 .kr(8) 15157 .sr(1) 15158 .m(4) 15159 .n(n) 15160 .k(16) 15161 .iterations(1) 15162 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15163 } 15164 } 15165 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_lt_16)15166 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_lt_16) { 15167 TEST_REQUIRES_ARM_NEON; 15168 for (size_t k = 1; k < 16; k++) { 15169 GemmMicrokernelTester() 15170 .mr(4) 15171 .nr(8) 15172 .kr(8) 15173 .sr(1) 15174 .m(4) 15175 .n(8) 15176 .k(k) 15177 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15178 } 15179 } 15180 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_lt_16_strided_a)15181 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_lt_16_strided_a) { 15182 TEST_REQUIRES_ARM_NEON; 15183 for (size_t k = 1; k < 16; k++) { 15184 GemmMicrokernelTester() 15185 .mr(4) 15186 .nr(8) 15187 .kr(8) 15188 .sr(1) 15189 .m(4) 15190 .n(8) 15191 .k(k) 15192 .a_stride(19) 15193 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15194 } 15195 } 15196 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_lt_16_subtile)15197 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_lt_16_subtile) { 15198 TEST_REQUIRES_ARM_NEON; 15199 for (size_t k = 1; k < 16; k++) { 15200 for (uint32_t n = 1; n <= 8; n++) { 15201 for (uint32_t m = 1; m <= 4; m++) { 15202 GemmMicrokernelTester() 15203 .mr(4) 15204 .nr(8) 15205 .kr(8) 15206 .sr(1) 15207 .m(m) 15208 .n(n) 15209 .k(k) 15210 .iterations(1) 15211 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15212 } 15213 } 15214 } 15215 } 15216 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_gt_16)15217 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_gt_16) { 15218 TEST_REQUIRES_ARM_NEON; 15219 for (size_t k = 17; k < 32; k++) { 15220 GemmMicrokernelTester() 15221 .mr(4) 15222 .nr(8) 15223 .kr(8) 15224 .sr(1) 15225 .m(4) 15226 .n(8) 15227 .k(k) 15228 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15229 } 15230 } 15231 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_gt_16_strided_a)15232 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_gt_16_strided_a) { 15233 TEST_REQUIRES_ARM_NEON; 15234 for (size_t k = 17; k < 32; k++) { 15235 GemmMicrokernelTester() 15236 .mr(4) 15237 .nr(8) 15238 .kr(8) 15239 .sr(1) 15240 .m(4) 15241 .n(8) 15242 .k(k) 15243 .a_stride(37) 15244 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15245 } 15246 } 15247 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_gt_16_subtile)15248 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_gt_16_subtile) { 15249 TEST_REQUIRES_ARM_NEON; 15250 for (size_t k = 17; k < 32; k++) { 15251 for (uint32_t n = 1; n <= 8; n++) { 15252 for (uint32_t m = 1; m <= 4; m++) { 15253 GemmMicrokernelTester() 15254 .mr(4) 15255 .nr(8) 15256 .kr(8) 15257 .sr(1) 15258 .m(m) 15259 .n(n) 15260 .k(k) 15261 .iterations(1) 15262 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15263 } 15264 } 15265 } 15266 } 15267 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_div_16)15268 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_div_16) { 15269 TEST_REQUIRES_ARM_NEON; 15270 for (size_t k = 32; k <= 160; k += 16) { 15271 GemmMicrokernelTester() 15272 .mr(4) 15273 .nr(8) 15274 .kr(8) 15275 .sr(1) 15276 .m(4) 15277 .n(8) 15278 .k(k) 15279 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15280 } 15281 } 15282 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_div_16_strided_a)15283 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_div_16_strided_a) { 15284 TEST_REQUIRES_ARM_NEON; 15285 for (size_t k = 32; k <= 160; k += 16) { 15286 GemmMicrokernelTester() 15287 .mr(4) 15288 .nr(8) 15289 .kr(8) 15290 .sr(1) 15291 .m(4) 15292 .n(8) 15293 .k(k) 15294 .a_stride(163) 15295 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15296 } 15297 } 15298 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_div_16_subtile)15299 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_div_16_subtile) { 15300 TEST_REQUIRES_ARM_NEON; 15301 for (size_t k = 32; k <= 160; k += 16) { 15302 for (uint32_t n = 1; n <= 8; n++) { 15303 for (uint32_t m = 1; m <= 4; m++) { 15304 GemmMicrokernelTester() 15305 .mr(4) 15306 .nr(8) 15307 .kr(8) 15308 .sr(1) 15309 .m(m) 15310 .n(n) 15311 .k(k) 15312 .iterations(1) 15313 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15314 } 15315 } 15316 } 15317 } 15318 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,n_gt_8)15319 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_gt_8) { 15320 TEST_REQUIRES_ARM_NEON; 15321 for (uint32_t n = 9; n < 16; n++) { 15322 for (size_t k = 1; k <= 80; k += 17) { 15323 GemmMicrokernelTester() 15324 .mr(4) 15325 .nr(8) 15326 .kr(8) 15327 .sr(1) 15328 .m(4) 15329 .n(n) 15330 .k(k) 15331 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15332 } 15333 } 15334 } 15335 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,n_gt_8_strided_cn)15336 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_gt_8_strided_cn) { 15337 TEST_REQUIRES_ARM_NEON; 15338 for (uint32_t n = 9; n < 16; n++) { 15339 for (size_t k = 1; k <= 80; k += 17) { 15340 GemmMicrokernelTester() 15341 .mr(4) 15342 .nr(8) 15343 .kr(8) 15344 .sr(1) 15345 .m(4) 15346 .n(n) 15347 .k(k) 15348 .cn_stride(11) 15349 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15350 } 15351 } 15352 } 15353 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,n_gt_8_strided_a)15354 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_gt_8_strided_a) { 15355 TEST_REQUIRES_ARM_NEON; 15356 for (uint32_t n = 9; n < 16; n++) { 15357 for (size_t k = 1; k <= 80; k += 17) { 15358 GemmMicrokernelTester() 15359 .mr(4) 15360 .nr(8) 15361 .kr(8) 15362 .sr(1) 15363 .m(4) 15364 .n(n) 15365 .k(k) 15366 .a_stride(83) 15367 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15368 } 15369 } 15370 } 15371 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,n_gt_8_subtile)15372 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_gt_8_subtile) { 15373 TEST_REQUIRES_ARM_NEON; 15374 for (uint32_t n = 9; n < 16; n++) { 15375 for (size_t k = 1; k <= 80; k += 17) { 15376 for (uint32_t m = 1; m <= 4; m++) { 15377 GemmMicrokernelTester() 15378 .mr(4) 15379 .nr(8) 15380 .kr(8) 15381 .sr(1) 15382 .m(m) 15383 .n(n) 15384 .k(k) 15385 .iterations(1) 15386 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15387 } 15388 } 15389 } 15390 } 15391 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,n_div_8)15392 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_div_8) { 15393 TEST_REQUIRES_ARM_NEON; 15394 for (uint32_t n = 16; n <= 24; n += 8) { 15395 for (size_t k = 1; k <= 80; k += 17) { 15396 GemmMicrokernelTester() 15397 .mr(4) 15398 .nr(8) 15399 .kr(8) 15400 .sr(1) 15401 .m(4) 15402 .n(n) 15403 .k(k) 15404 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15405 } 15406 } 15407 } 15408 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,n_div_8_strided_cn)15409 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_div_8_strided_cn) { 15410 TEST_REQUIRES_ARM_NEON; 15411 for (uint32_t n = 16; n <= 24; n += 8) { 15412 for (size_t k = 1; k <= 80; k += 17) { 15413 GemmMicrokernelTester() 15414 .mr(4) 15415 .nr(8) 15416 .kr(8) 15417 .sr(1) 15418 .m(4) 15419 .n(n) 15420 .k(k) 15421 .cn_stride(11) 15422 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15423 } 15424 } 15425 } 15426 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,n_div_8_strided_a)15427 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_div_8_strided_a) { 15428 TEST_REQUIRES_ARM_NEON; 15429 for (uint32_t n = 16; n <= 24; n += 8) { 15430 for (size_t k = 1; k <= 80; k += 17) { 15431 GemmMicrokernelTester() 15432 .mr(4) 15433 .nr(8) 15434 .kr(8) 15435 .sr(1) 15436 .m(4) 15437 .n(n) 15438 .k(k) 15439 .a_stride(83) 15440 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15441 } 15442 } 15443 } 15444 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,n_div_8_subtile)15445 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_div_8_subtile) { 15446 TEST_REQUIRES_ARM_NEON; 15447 for (uint32_t n = 16; n <= 24; n += 8) { 15448 for (size_t k = 1; k <= 80; k += 17) { 15449 for (uint32_t m = 1; m <= 4; m++) { 15450 GemmMicrokernelTester() 15451 .mr(4) 15452 .nr(8) 15453 .kr(8) 15454 .sr(1) 15455 .m(m) 15456 .n(n) 15457 .k(k) 15458 .iterations(1) 15459 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15460 } 15461 } 15462 } 15463 } 15464 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,strided_cm_subtile)15465 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, strided_cm_subtile) { 15466 TEST_REQUIRES_ARM_NEON; 15467 for (size_t k = 1; k <= 80; k += 17) { 15468 for (uint32_t n = 1; n <= 8; n++) { 15469 for (uint32_t m = 1; m <= 4; m++) { 15470 GemmMicrokernelTester() 15471 .mr(4) 15472 .nr(8) 15473 .kr(8) 15474 .sr(1) 15475 .m(m) 15476 .n(n) 15477 .k(k) 15478 .cm_stride(11) 15479 .iterations(1) 15480 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15481 } 15482 } 15483 } 15484 } 15485 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,qmin)15486 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, qmin) { 15487 TEST_REQUIRES_ARM_NEON; 15488 GemmMicrokernelTester() 15489 .mr(4) 15490 .nr(8) 15491 .kr(8) 15492 .sr(1) 15493 .m(4) 15494 .n(8) 15495 .k(16) 15496 .qmin(128) 15497 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15498 } 15499 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,qmax)15500 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, qmax) { 15501 TEST_REQUIRES_ARM_NEON; 15502 GemmMicrokernelTester() 15503 .mr(4) 15504 .nr(8) 15505 .kr(8) 15506 .sr(1) 15507 .m(4) 15508 .n(8) 15509 .k(16) 15510 .qmax(128) 15511 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15512 } 15513 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,strided_cm)15514 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, strided_cm) { 15515 TEST_REQUIRES_ARM_NEON; 15516 GemmMicrokernelTester() 15517 .mr(4) 15518 .nr(8) 15519 .kr(8) 15520 .sr(1) 15521 .m(4) 15522 .n(8) 15523 .k(16) 15524 .cm_stride(11) 15525 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15526 } 15527 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 15528 15529 15530 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_eq_16)15531 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_eq_16) { 15532 TEST_REQUIRES_ARM_NEON; 15533 GemmMicrokernelTester() 15534 .mr(2) 15535 .nr(16) 15536 .kr(16) 15537 .sr(1) 15538 .m(2) 15539 .n(16) 15540 .k(16) 15541 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15542 } 15543 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,strided_cn)15544 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, strided_cn) { 15545 TEST_REQUIRES_ARM_NEON; 15546 GemmMicrokernelTester() 15547 .mr(2) 15548 .nr(16) 15549 .kr(16) 15550 .sr(1) 15551 .m(2) 15552 .n(16) 15553 .k(16) 15554 .cn_stride(19) 15555 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15556 } 15557 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_eq_16_strided_a)15558 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_eq_16_strided_a) { 15559 TEST_REQUIRES_ARM_NEON; 15560 GemmMicrokernelTester() 15561 .mr(2) 15562 .nr(16) 15563 .kr(16) 15564 .sr(1) 15565 .m(2) 15566 .n(16) 15567 .k(16) 15568 .a_stride(19) 15569 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15570 } 15571 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_eq_16_subtile)15572 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_eq_16_subtile) { 15573 TEST_REQUIRES_ARM_NEON; 15574 for (uint32_t n = 1; n <= 16; n++) { 15575 for (uint32_t m = 1; m <= 2; m++) { 15576 GemmMicrokernelTester() 15577 .mr(2) 15578 .nr(16) 15579 .kr(16) 15580 .sr(1) 15581 .m(m) 15582 .n(n) 15583 .k(16) 15584 .iterations(1) 15585 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15586 } 15587 } 15588 } 15589 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_eq_16_subtile_m)15590 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_eq_16_subtile_m) { 15591 TEST_REQUIRES_ARM_NEON; 15592 for (uint32_t m = 1; m <= 2; m++) { 15593 GemmMicrokernelTester() 15594 .mr(2) 15595 .nr(16) 15596 .kr(16) 15597 .sr(1) 15598 .m(m) 15599 .n(16) 15600 .k(16) 15601 .iterations(1) 15602 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15603 } 15604 } 15605 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_eq_16_subtile_n)15606 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_eq_16_subtile_n) { 15607 TEST_REQUIRES_ARM_NEON; 15608 for (uint32_t n = 1; n <= 16; n++) { 15609 GemmMicrokernelTester() 15610 .mr(2) 15611 .nr(16) 15612 .kr(16) 15613 .sr(1) 15614 .m(2) 15615 .n(n) 15616 .k(16) 15617 .iterations(1) 15618 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15619 } 15620 } 15621 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_lt_16)15622 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_lt_16) { 15623 TEST_REQUIRES_ARM_NEON; 15624 for (size_t k = 1; k < 16; k++) { 15625 GemmMicrokernelTester() 15626 .mr(2) 15627 .nr(16) 15628 .kr(16) 15629 .sr(1) 15630 .m(2) 15631 .n(16) 15632 .k(k) 15633 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15634 } 15635 } 15636 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_lt_16_strided_a)15637 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_lt_16_strided_a) { 15638 TEST_REQUIRES_ARM_NEON; 15639 for (size_t k = 1; k < 16; k++) { 15640 GemmMicrokernelTester() 15641 .mr(2) 15642 .nr(16) 15643 .kr(16) 15644 .sr(1) 15645 .m(2) 15646 .n(16) 15647 .k(k) 15648 .a_stride(19) 15649 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15650 } 15651 } 15652 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_lt_16_subtile)15653 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_lt_16_subtile) { 15654 TEST_REQUIRES_ARM_NEON; 15655 for (size_t k = 1; k < 16; k++) { 15656 for (uint32_t n = 1; n <= 16; n++) { 15657 for (uint32_t m = 1; m <= 2; m++) { 15658 GemmMicrokernelTester() 15659 .mr(2) 15660 .nr(16) 15661 .kr(16) 15662 .sr(1) 15663 .m(m) 15664 .n(n) 15665 .k(k) 15666 .iterations(1) 15667 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15668 } 15669 } 15670 } 15671 } 15672 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_gt_16)15673 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_gt_16) { 15674 TEST_REQUIRES_ARM_NEON; 15675 for (size_t k = 17; k < 32; k++) { 15676 GemmMicrokernelTester() 15677 .mr(2) 15678 .nr(16) 15679 .kr(16) 15680 .sr(1) 15681 .m(2) 15682 .n(16) 15683 .k(k) 15684 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15685 } 15686 } 15687 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_gt_16_strided_a)15688 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_gt_16_strided_a) { 15689 TEST_REQUIRES_ARM_NEON; 15690 for (size_t k = 17; k < 32; k++) { 15691 GemmMicrokernelTester() 15692 .mr(2) 15693 .nr(16) 15694 .kr(16) 15695 .sr(1) 15696 .m(2) 15697 .n(16) 15698 .k(k) 15699 .a_stride(37) 15700 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15701 } 15702 } 15703 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_gt_16_subtile)15704 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_gt_16_subtile) { 15705 TEST_REQUIRES_ARM_NEON; 15706 for (size_t k = 17; k < 32; k++) { 15707 for (uint32_t n = 1; n <= 16; n++) { 15708 for (uint32_t m = 1; m <= 2; m++) { 15709 GemmMicrokernelTester() 15710 .mr(2) 15711 .nr(16) 15712 .kr(16) 15713 .sr(1) 15714 .m(m) 15715 .n(n) 15716 .k(k) 15717 .iterations(1) 15718 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15719 } 15720 } 15721 } 15722 } 15723 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_div_16)15724 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_div_16) { 15725 TEST_REQUIRES_ARM_NEON; 15726 for (size_t k = 32; k <= 160; k += 16) { 15727 GemmMicrokernelTester() 15728 .mr(2) 15729 .nr(16) 15730 .kr(16) 15731 .sr(1) 15732 .m(2) 15733 .n(16) 15734 .k(k) 15735 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15736 } 15737 } 15738 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_div_16_strided_a)15739 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_div_16_strided_a) { 15740 TEST_REQUIRES_ARM_NEON; 15741 for (size_t k = 32; k <= 160; k += 16) { 15742 GemmMicrokernelTester() 15743 .mr(2) 15744 .nr(16) 15745 .kr(16) 15746 .sr(1) 15747 .m(2) 15748 .n(16) 15749 .k(k) 15750 .a_stride(163) 15751 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15752 } 15753 } 15754 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_div_16_subtile)15755 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_div_16_subtile) { 15756 TEST_REQUIRES_ARM_NEON; 15757 for (size_t k = 32; k <= 160; k += 16) { 15758 for (uint32_t n = 1; n <= 16; n++) { 15759 for (uint32_t m = 1; m <= 2; m++) { 15760 GemmMicrokernelTester() 15761 .mr(2) 15762 .nr(16) 15763 .kr(16) 15764 .sr(1) 15765 .m(m) 15766 .n(n) 15767 .k(k) 15768 .iterations(1) 15769 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15770 } 15771 } 15772 } 15773 } 15774 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,n_gt_16)15775 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_gt_16) { 15776 TEST_REQUIRES_ARM_NEON; 15777 for (uint32_t n = 17; n < 32; n++) { 15778 for (size_t k = 1; k <= 80; k += 17) { 15779 GemmMicrokernelTester() 15780 .mr(2) 15781 .nr(16) 15782 .kr(16) 15783 .sr(1) 15784 .m(2) 15785 .n(n) 15786 .k(k) 15787 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15788 } 15789 } 15790 } 15791 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,n_gt_16_strided_cn)15792 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_gt_16_strided_cn) { 15793 TEST_REQUIRES_ARM_NEON; 15794 for (uint32_t n = 17; n < 32; n++) { 15795 for (size_t k = 1; k <= 80; k += 17) { 15796 GemmMicrokernelTester() 15797 .mr(2) 15798 .nr(16) 15799 .kr(16) 15800 .sr(1) 15801 .m(2) 15802 .n(n) 15803 .k(k) 15804 .cn_stride(19) 15805 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15806 } 15807 } 15808 } 15809 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,n_gt_16_strided_a)15810 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_gt_16_strided_a) { 15811 TEST_REQUIRES_ARM_NEON; 15812 for (uint32_t n = 17; n < 32; n++) { 15813 for (size_t k = 1; k <= 80; k += 17) { 15814 GemmMicrokernelTester() 15815 .mr(2) 15816 .nr(16) 15817 .kr(16) 15818 .sr(1) 15819 .m(2) 15820 .n(n) 15821 .k(k) 15822 .a_stride(83) 15823 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15824 } 15825 } 15826 } 15827 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,n_gt_16_subtile)15828 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_gt_16_subtile) { 15829 TEST_REQUIRES_ARM_NEON; 15830 for (uint32_t n = 17; n < 32; n++) { 15831 for (size_t k = 1; k <= 80; k += 17) { 15832 for (uint32_t m = 1; m <= 2; m++) { 15833 GemmMicrokernelTester() 15834 .mr(2) 15835 .nr(16) 15836 .kr(16) 15837 .sr(1) 15838 .m(m) 15839 .n(n) 15840 .k(k) 15841 .iterations(1) 15842 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15843 } 15844 } 15845 } 15846 } 15847 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,n_div_16)15848 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_div_16) { 15849 TEST_REQUIRES_ARM_NEON; 15850 for (uint32_t n = 32; n <= 48; n += 16) { 15851 for (size_t k = 1; k <= 80; k += 17) { 15852 GemmMicrokernelTester() 15853 .mr(2) 15854 .nr(16) 15855 .kr(16) 15856 .sr(1) 15857 .m(2) 15858 .n(n) 15859 .k(k) 15860 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15861 } 15862 } 15863 } 15864 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,n_div_16_strided_cn)15865 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_div_16_strided_cn) { 15866 TEST_REQUIRES_ARM_NEON; 15867 for (uint32_t n = 32; n <= 48; n += 16) { 15868 for (size_t k = 1; k <= 80; k += 17) { 15869 GemmMicrokernelTester() 15870 .mr(2) 15871 .nr(16) 15872 .kr(16) 15873 .sr(1) 15874 .m(2) 15875 .n(n) 15876 .k(k) 15877 .cn_stride(19) 15878 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15879 } 15880 } 15881 } 15882 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,n_div_16_strided_a)15883 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_div_16_strided_a) { 15884 TEST_REQUIRES_ARM_NEON; 15885 for (uint32_t n = 32; n <= 48; n += 16) { 15886 for (size_t k = 1; k <= 80; k += 17) { 15887 GemmMicrokernelTester() 15888 .mr(2) 15889 .nr(16) 15890 .kr(16) 15891 .sr(1) 15892 .m(2) 15893 .n(n) 15894 .k(k) 15895 .a_stride(83) 15896 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15897 } 15898 } 15899 } 15900 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,n_div_16_subtile)15901 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_div_16_subtile) { 15902 TEST_REQUIRES_ARM_NEON; 15903 for (uint32_t n = 32; n <= 48; n += 16) { 15904 for (size_t k = 1; k <= 80; k += 17) { 15905 for (uint32_t m = 1; m <= 2; m++) { 15906 GemmMicrokernelTester() 15907 .mr(2) 15908 .nr(16) 15909 .kr(16) 15910 .sr(1) 15911 .m(m) 15912 .n(n) 15913 .k(k) 15914 .iterations(1) 15915 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15916 } 15917 } 15918 } 15919 } 15920 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,strided_cm_subtile)15921 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, strided_cm_subtile) { 15922 TEST_REQUIRES_ARM_NEON; 15923 for (size_t k = 1; k <= 80; k += 17) { 15924 for (uint32_t n = 1; n <= 16; n++) { 15925 for (uint32_t m = 1; m <= 2; m++) { 15926 GemmMicrokernelTester() 15927 .mr(2) 15928 .nr(16) 15929 .kr(16) 15930 .sr(1) 15931 .m(m) 15932 .n(n) 15933 .k(k) 15934 .cm_stride(19) 15935 .iterations(1) 15936 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15937 } 15938 } 15939 } 15940 } 15941 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,qmin)15942 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, qmin) { 15943 TEST_REQUIRES_ARM_NEON; 15944 GemmMicrokernelTester() 15945 .mr(2) 15946 .nr(16) 15947 .kr(16) 15948 .sr(1) 15949 .m(2) 15950 .n(16) 15951 .k(16) 15952 .qmin(128) 15953 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15954 } 15955 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,qmax)15956 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, qmax) { 15957 TEST_REQUIRES_ARM_NEON; 15958 GemmMicrokernelTester() 15959 .mr(2) 15960 .nr(16) 15961 .kr(16) 15962 .sr(1) 15963 .m(2) 15964 .n(16) 15965 .k(16) 15966 .qmax(128) 15967 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15968 } 15969 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,strided_cm)15970 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, strided_cm) { 15971 TEST_REQUIRES_ARM_NEON; 15972 GemmMicrokernelTester() 15973 .mr(2) 15974 .nr(16) 15975 .kr(16) 15976 .sr(1) 15977 .m(2) 15978 .n(16) 15979 .k(16) 15980 .cm_stride(19) 15981 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15982 } 15983 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 15984 15985 15986 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_eq_8)15987 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8) { 15988 TEST_REQUIRES_ARM_NEON; 15989 GemmMicrokernelTester() 15990 .mr(2) 15991 .nr(16) 15992 .kr(1) 15993 .sr(1) 15994 .m(2) 15995 .n(16) 15996 .k(8) 15997 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15998 } 15999 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,strided_cn)16000 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, strided_cn) { 16001 TEST_REQUIRES_ARM_NEON; 16002 GemmMicrokernelTester() 16003 .mr(2) 16004 .nr(16) 16005 .kr(1) 16006 .sr(1) 16007 .m(2) 16008 .n(16) 16009 .k(8) 16010 .cn_stride(19) 16011 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16012 } 16013 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_eq_8_strided_a)16014 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8_strided_a) { 16015 TEST_REQUIRES_ARM_NEON; 16016 GemmMicrokernelTester() 16017 .mr(2) 16018 .nr(16) 16019 .kr(1) 16020 .sr(1) 16021 .m(2) 16022 .n(16) 16023 .k(8) 16024 .a_stride(11) 16025 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16026 } 16027 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_eq_8_subtile)16028 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8_subtile) { 16029 TEST_REQUIRES_ARM_NEON; 16030 for (uint32_t n = 1; n <= 16; n++) { 16031 for (uint32_t m = 1; m <= 2; m++) { 16032 GemmMicrokernelTester() 16033 .mr(2) 16034 .nr(16) 16035 .kr(1) 16036 .sr(1) 16037 .m(m) 16038 .n(n) 16039 .k(8) 16040 .iterations(1) 16041 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16042 } 16043 } 16044 } 16045 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_eq_8_subtile_m)16046 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8_subtile_m) { 16047 TEST_REQUIRES_ARM_NEON; 16048 for (uint32_t m = 1; m <= 2; m++) { 16049 GemmMicrokernelTester() 16050 .mr(2) 16051 .nr(16) 16052 .kr(1) 16053 .sr(1) 16054 .m(m) 16055 .n(16) 16056 .k(8) 16057 .iterations(1) 16058 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16059 } 16060 } 16061 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_eq_8_subtile_n)16062 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8_subtile_n) { 16063 TEST_REQUIRES_ARM_NEON; 16064 for (uint32_t n = 1; n <= 16; n++) { 16065 GemmMicrokernelTester() 16066 .mr(2) 16067 .nr(16) 16068 .kr(1) 16069 .sr(1) 16070 .m(2) 16071 .n(n) 16072 .k(8) 16073 .iterations(1) 16074 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16075 } 16076 } 16077 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_lt_8)16078 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_lt_8) { 16079 TEST_REQUIRES_ARM_NEON; 16080 for (size_t k = 1; k < 8; k++) { 16081 GemmMicrokernelTester() 16082 .mr(2) 16083 .nr(16) 16084 .kr(1) 16085 .sr(1) 16086 .m(2) 16087 .n(16) 16088 .k(k) 16089 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16090 } 16091 } 16092 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_lt_8_strided_a)16093 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_lt_8_strided_a) { 16094 TEST_REQUIRES_ARM_NEON; 16095 for (size_t k = 1; k < 8; k++) { 16096 GemmMicrokernelTester() 16097 .mr(2) 16098 .nr(16) 16099 .kr(1) 16100 .sr(1) 16101 .m(2) 16102 .n(16) 16103 .k(k) 16104 .a_stride(11) 16105 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16106 } 16107 } 16108 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_lt_8_subtile)16109 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_lt_8_subtile) { 16110 TEST_REQUIRES_ARM_NEON; 16111 for (size_t k = 1; k < 8; k++) { 16112 for (uint32_t n = 1; n <= 16; n++) { 16113 for (uint32_t m = 1; m <= 2; m++) { 16114 GemmMicrokernelTester() 16115 .mr(2) 16116 .nr(16) 16117 .kr(1) 16118 .sr(1) 16119 .m(m) 16120 .n(n) 16121 .k(k) 16122 .iterations(1) 16123 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16124 } 16125 } 16126 } 16127 } 16128 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_gt_8)16129 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_gt_8) { 16130 TEST_REQUIRES_ARM_NEON; 16131 for (size_t k = 9; k < 16; k++) { 16132 GemmMicrokernelTester() 16133 .mr(2) 16134 .nr(16) 16135 .kr(1) 16136 .sr(1) 16137 .m(2) 16138 .n(16) 16139 .k(k) 16140 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16141 } 16142 } 16143 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_gt_8_strided_a)16144 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_gt_8_strided_a) { 16145 TEST_REQUIRES_ARM_NEON; 16146 for (size_t k = 9; k < 16; k++) { 16147 GemmMicrokernelTester() 16148 .mr(2) 16149 .nr(16) 16150 .kr(1) 16151 .sr(1) 16152 .m(2) 16153 .n(16) 16154 .k(k) 16155 .a_stride(19) 16156 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16157 } 16158 } 16159 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_gt_8_subtile)16160 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_gt_8_subtile) { 16161 TEST_REQUIRES_ARM_NEON; 16162 for (size_t k = 9; k < 16; k++) { 16163 for (uint32_t n = 1; n <= 16; n++) { 16164 for (uint32_t m = 1; m <= 2; m++) { 16165 GemmMicrokernelTester() 16166 .mr(2) 16167 .nr(16) 16168 .kr(1) 16169 .sr(1) 16170 .m(m) 16171 .n(n) 16172 .k(k) 16173 .iterations(1) 16174 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16175 } 16176 } 16177 } 16178 } 16179 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_div_8)16180 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_div_8) { 16181 TEST_REQUIRES_ARM_NEON; 16182 for (size_t k = 16; k <= 80; k += 8) { 16183 GemmMicrokernelTester() 16184 .mr(2) 16185 .nr(16) 16186 .kr(1) 16187 .sr(1) 16188 .m(2) 16189 .n(16) 16190 .k(k) 16191 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16192 } 16193 } 16194 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_div_8_strided_a)16195 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_div_8_strided_a) { 16196 TEST_REQUIRES_ARM_NEON; 16197 for (size_t k = 16; k <= 80; k += 8) { 16198 GemmMicrokernelTester() 16199 .mr(2) 16200 .nr(16) 16201 .kr(1) 16202 .sr(1) 16203 .m(2) 16204 .n(16) 16205 .k(k) 16206 .a_stride(83) 16207 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16208 } 16209 } 16210 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_div_8_subtile)16211 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_div_8_subtile) { 16212 TEST_REQUIRES_ARM_NEON; 16213 for (size_t k = 16; k <= 80; k += 8) { 16214 for (uint32_t n = 1; n <= 16; n++) { 16215 for (uint32_t m = 1; m <= 2; m++) { 16216 GemmMicrokernelTester() 16217 .mr(2) 16218 .nr(16) 16219 .kr(1) 16220 .sr(1) 16221 .m(m) 16222 .n(n) 16223 .k(k) 16224 .iterations(1) 16225 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16226 } 16227 } 16228 } 16229 } 16230 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_gt_16)16231 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_gt_16) { 16232 TEST_REQUIRES_ARM_NEON; 16233 for (uint32_t n = 17; n < 32; n++) { 16234 for (size_t k = 1; k <= 40; k += 9) { 16235 GemmMicrokernelTester() 16236 .mr(2) 16237 .nr(16) 16238 .kr(1) 16239 .sr(1) 16240 .m(2) 16241 .n(n) 16242 .k(k) 16243 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16244 } 16245 } 16246 } 16247 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_gt_16_strided_cn)16248 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_gt_16_strided_cn) { 16249 TEST_REQUIRES_ARM_NEON; 16250 for (uint32_t n = 17; n < 32; n++) { 16251 for (size_t k = 1; k <= 40; k += 9) { 16252 GemmMicrokernelTester() 16253 .mr(2) 16254 .nr(16) 16255 .kr(1) 16256 .sr(1) 16257 .m(2) 16258 .n(n) 16259 .k(k) 16260 .cn_stride(19) 16261 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16262 } 16263 } 16264 } 16265 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_gt_16_strided_a)16266 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_gt_16_strided_a) { 16267 TEST_REQUIRES_ARM_NEON; 16268 for (uint32_t n = 17; n < 32; n++) { 16269 for (size_t k = 1; k <= 40; k += 9) { 16270 GemmMicrokernelTester() 16271 .mr(2) 16272 .nr(16) 16273 .kr(1) 16274 .sr(1) 16275 .m(2) 16276 .n(n) 16277 .k(k) 16278 .a_stride(43) 16279 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16280 } 16281 } 16282 } 16283 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_gt_16_subtile)16284 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_gt_16_subtile) { 16285 TEST_REQUIRES_ARM_NEON; 16286 for (uint32_t n = 17; n < 32; n++) { 16287 for (size_t k = 1; k <= 40; k += 9) { 16288 for (uint32_t m = 1; m <= 2; m++) { 16289 GemmMicrokernelTester() 16290 .mr(2) 16291 .nr(16) 16292 .kr(1) 16293 .sr(1) 16294 .m(m) 16295 .n(n) 16296 .k(k) 16297 .iterations(1) 16298 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16299 } 16300 } 16301 } 16302 } 16303 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_div_16)16304 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_div_16) { 16305 TEST_REQUIRES_ARM_NEON; 16306 for (uint32_t n = 32; n <= 48; n += 16) { 16307 for (size_t k = 1; k <= 40; k += 9) { 16308 GemmMicrokernelTester() 16309 .mr(2) 16310 .nr(16) 16311 .kr(1) 16312 .sr(1) 16313 .m(2) 16314 .n(n) 16315 .k(k) 16316 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16317 } 16318 } 16319 } 16320 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_div_16_strided_cn)16321 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_div_16_strided_cn) { 16322 TEST_REQUIRES_ARM_NEON; 16323 for (uint32_t n = 32; n <= 48; n += 16) { 16324 for (size_t k = 1; k <= 40; k += 9) { 16325 GemmMicrokernelTester() 16326 .mr(2) 16327 .nr(16) 16328 .kr(1) 16329 .sr(1) 16330 .m(2) 16331 .n(n) 16332 .k(k) 16333 .cn_stride(19) 16334 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16335 } 16336 } 16337 } 16338 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_div_16_strided_a)16339 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_div_16_strided_a) { 16340 TEST_REQUIRES_ARM_NEON; 16341 for (uint32_t n = 32; n <= 48; n += 16) { 16342 for (size_t k = 1; k <= 40; k += 9) { 16343 GemmMicrokernelTester() 16344 .mr(2) 16345 .nr(16) 16346 .kr(1) 16347 .sr(1) 16348 .m(2) 16349 .n(n) 16350 .k(k) 16351 .a_stride(43) 16352 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16353 } 16354 } 16355 } 16356 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_div_16_subtile)16357 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_div_16_subtile) { 16358 TEST_REQUIRES_ARM_NEON; 16359 for (uint32_t n = 32; n <= 48; n += 16) { 16360 for (size_t k = 1; k <= 40; k += 9) { 16361 for (uint32_t m = 1; m <= 2; m++) { 16362 GemmMicrokernelTester() 16363 .mr(2) 16364 .nr(16) 16365 .kr(1) 16366 .sr(1) 16367 .m(m) 16368 .n(n) 16369 .k(k) 16370 .iterations(1) 16371 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16372 } 16373 } 16374 } 16375 } 16376 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,strided_cm_subtile)16377 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, strided_cm_subtile) { 16378 TEST_REQUIRES_ARM_NEON; 16379 for (size_t k = 1; k <= 40; k += 9) { 16380 for (uint32_t n = 1; n <= 16; n++) { 16381 for (uint32_t m = 1; m <= 2; m++) { 16382 GemmMicrokernelTester() 16383 .mr(2) 16384 .nr(16) 16385 .kr(1) 16386 .sr(1) 16387 .m(m) 16388 .n(n) 16389 .k(k) 16390 .cm_stride(19) 16391 .iterations(1) 16392 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16393 } 16394 } 16395 } 16396 } 16397 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,qmin)16398 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, qmin) { 16399 TEST_REQUIRES_ARM_NEON; 16400 GemmMicrokernelTester() 16401 .mr(2) 16402 .nr(16) 16403 .kr(1) 16404 .sr(1) 16405 .m(2) 16406 .n(16) 16407 .k(8) 16408 .qmin(128) 16409 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16410 } 16411 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,qmax)16412 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, qmax) { 16413 TEST_REQUIRES_ARM_NEON; 16414 GemmMicrokernelTester() 16415 .mr(2) 16416 .nr(16) 16417 .kr(1) 16418 .sr(1) 16419 .m(2) 16420 .n(16) 16421 .k(8) 16422 .qmax(128) 16423 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16424 } 16425 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,strided_cm)16426 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, strided_cm) { 16427 TEST_REQUIRES_ARM_NEON; 16428 GemmMicrokernelTester() 16429 .mr(2) 16430 .nr(16) 16431 .kr(1) 16432 .sr(1) 16433 .m(2) 16434 .n(16) 16435 .k(8) 16436 .cm_stride(19) 16437 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16438 } 16439 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 16440 16441 16442 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_eq_8)16443 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8) { 16444 TEST_REQUIRES_ARM_NEON; 16445 GemmMicrokernelTester() 16446 .mr(4) 16447 .nr(16) 16448 .kr(1) 16449 .sr(1) 16450 .m(4) 16451 .n(16) 16452 .k(8) 16453 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16454 } 16455 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,strided_cn)16456 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, strided_cn) { 16457 TEST_REQUIRES_ARM_NEON; 16458 GemmMicrokernelTester() 16459 .mr(4) 16460 .nr(16) 16461 .kr(1) 16462 .sr(1) 16463 .m(4) 16464 .n(16) 16465 .k(8) 16466 .cn_stride(19) 16467 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16468 } 16469 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_eq_8_strided_a)16470 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_strided_a) { 16471 TEST_REQUIRES_ARM_NEON; 16472 GemmMicrokernelTester() 16473 .mr(4) 16474 .nr(16) 16475 .kr(1) 16476 .sr(1) 16477 .m(4) 16478 .n(16) 16479 .k(8) 16480 .a_stride(11) 16481 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16482 } 16483 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_eq_8_subtile)16484 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_subtile) { 16485 TEST_REQUIRES_ARM_NEON; 16486 for (uint32_t n = 1; n <= 16; n++) { 16487 for (uint32_t m = 1; m <= 4; m++) { 16488 GemmMicrokernelTester() 16489 .mr(4) 16490 .nr(16) 16491 .kr(1) 16492 .sr(1) 16493 .m(m) 16494 .n(n) 16495 .k(8) 16496 .iterations(1) 16497 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16498 } 16499 } 16500 } 16501 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_eq_8_subtile_m)16502 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_subtile_m) { 16503 TEST_REQUIRES_ARM_NEON; 16504 for (uint32_t m = 1; m <= 4; m++) { 16505 GemmMicrokernelTester() 16506 .mr(4) 16507 .nr(16) 16508 .kr(1) 16509 .sr(1) 16510 .m(m) 16511 .n(16) 16512 .k(8) 16513 .iterations(1) 16514 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16515 } 16516 } 16517 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_eq_8_subtile_n)16518 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_subtile_n) { 16519 TEST_REQUIRES_ARM_NEON; 16520 for (uint32_t n = 1; n <= 16; n++) { 16521 GemmMicrokernelTester() 16522 .mr(4) 16523 .nr(16) 16524 .kr(1) 16525 .sr(1) 16526 .m(4) 16527 .n(n) 16528 .k(8) 16529 .iterations(1) 16530 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16531 } 16532 } 16533 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_lt_8)16534 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_lt_8) { 16535 TEST_REQUIRES_ARM_NEON; 16536 for (size_t k = 1; k < 8; k++) { 16537 GemmMicrokernelTester() 16538 .mr(4) 16539 .nr(16) 16540 .kr(1) 16541 .sr(1) 16542 .m(4) 16543 .n(16) 16544 .k(k) 16545 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16546 } 16547 } 16548 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_lt_8_strided_a)16549 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_lt_8_strided_a) { 16550 TEST_REQUIRES_ARM_NEON; 16551 for (size_t k = 1; k < 8; k++) { 16552 GemmMicrokernelTester() 16553 .mr(4) 16554 .nr(16) 16555 .kr(1) 16556 .sr(1) 16557 .m(4) 16558 .n(16) 16559 .k(k) 16560 .a_stride(11) 16561 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16562 } 16563 } 16564 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_lt_8_subtile)16565 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_lt_8_subtile) { 16566 TEST_REQUIRES_ARM_NEON; 16567 for (size_t k = 1; k < 8; k++) { 16568 for (uint32_t n = 1; n <= 16; n++) { 16569 for (uint32_t m = 1; m <= 4; m++) { 16570 GemmMicrokernelTester() 16571 .mr(4) 16572 .nr(16) 16573 .kr(1) 16574 .sr(1) 16575 .m(m) 16576 .n(n) 16577 .k(k) 16578 .iterations(1) 16579 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16580 } 16581 } 16582 } 16583 } 16584 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_gt_8)16585 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_gt_8) { 16586 TEST_REQUIRES_ARM_NEON; 16587 for (size_t k = 9; k < 16; k++) { 16588 GemmMicrokernelTester() 16589 .mr(4) 16590 .nr(16) 16591 .kr(1) 16592 .sr(1) 16593 .m(4) 16594 .n(16) 16595 .k(k) 16596 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16597 } 16598 } 16599 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_gt_8_strided_a)16600 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_gt_8_strided_a) { 16601 TEST_REQUIRES_ARM_NEON; 16602 for (size_t k = 9; k < 16; k++) { 16603 GemmMicrokernelTester() 16604 .mr(4) 16605 .nr(16) 16606 .kr(1) 16607 .sr(1) 16608 .m(4) 16609 .n(16) 16610 .k(k) 16611 .a_stride(19) 16612 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16613 } 16614 } 16615 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_gt_8_subtile)16616 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_gt_8_subtile) { 16617 TEST_REQUIRES_ARM_NEON; 16618 for (size_t k = 9; k < 16; k++) { 16619 for (uint32_t n = 1; n <= 16; n++) { 16620 for (uint32_t m = 1; m <= 4; m++) { 16621 GemmMicrokernelTester() 16622 .mr(4) 16623 .nr(16) 16624 .kr(1) 16625 .sr(1) 16626 .m(m) 16627 .n(n) 16628 .k(k) 16629 .iterations(1) 16630 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16631 } 16632 } 16633 } 16634 } 16635 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_div_8)16636 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_div_8) { 16637 TEST_REQUIRES_ARM_NEON; 16638 for (size_t k = 16; k <= 80; k += 8) { 16639 GemmMicrokernelTester() 16640 .mr(4) 16641 .nr(16) 16642 .kr(1) 16643 .sr(1) 16644 .m(4) 16645 .n(16) 16646 .k(k) 16647 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16648 } 16649 } 16650 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_div_8_strided_a)16651 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_div_8_strided_a) { 16652 TEST_REQUIRES_ARM_NEON; 16653 for (size_t k = 16; k <= 80; k += 8) { 16654 GemmMicrokernelTester() 16655 .mr(4) 16656 .nr(16) 16657 .kr(1) 16658 .sr(1) 16659 .m(4) 16660 .n(16) 16661 .k(k) 16662 .a_stride(83) 16663 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16664 } 16665 } 16666 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_div_8_subtile)16667 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_div_8_subtile) { 16668 TEST_REQUIRES_ARM_NEON; 16669 for (size_t k = 16; k <= 80; k += 8) { 16670 for (uint32_t n = 1; n <= 16; n++) { 16671 for (uint32_t m = 1; m <= 4; m++) { 16672 GemmMicrokernelTester() 16673 .mr(4) 16674 .nr(16) 16675 .kr(1) 16676 .sr(1) 16677 .m(m) 16678 .n(n) 16679 .k(k) 16680 .iterations(1) 16681 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16682 } 16683 } 16684 } 16685 } 16686 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_gt_16)16687 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16) { 16688 TEST_REQUIRES_ARM_NEON; 16689 for (uint32_t n = 17; n < 32; n++) { 16690 for (size_t k = 1; k <= 40; k += 9) { 16691 GemmMicrokernelTester() 16692 .mr(4) 16693 .nr(16) 16694 .kr(1) 16695 .sr(1) 16696 .m(4) 16697 .n(n) 16698 .k(k) 16699 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16700 } 16701 } 16702 } 16703 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_gt_16_strided_cn)16704 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16_strided_cn) { 16705 TEST_REQUIRES_ARM_NEON; 16706 for (uint32_t n = 17; n < 32; n++) { 16707 for (size_t k = 1; k <= 40; k += 9) { 16708 GemmMicrokernelTester() 16709 .mr(4) 16710 .nr(16) 16711 .kr(1) 16712 .sr(1) 16713 .m(4) 16714 .n(n) 16715 .k(k) 16716 .cn_stride(19) 16717 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16718 } 16719 } 16720 } 16721 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_gt_16_strided_a)16722 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16_strided_a) { 16723 TEST_REQUIRES_ARM_NEON; 16724 for (uint32_t n = 17; n < 32; n++) { 16725 for (size_t k = 1; k <= 40; k += 9) { 16726 GemmMicrokernelTester() 16727 .mr(4) 16728 .nr(16) 16729 .kr(1) 16730 .sr(1) 16731 .m(4) 16732 .n(n) 16733 .k(k) 16734 .a_stride(43) 16735 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16736 } 16737 } 16738 } 16739 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_gt_16_subtile)16740 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16_subtile) { 16741 TEST_REQUIRES_ARM_NEON; 16742 for (uint32_t n = 17; n < 32; n++) { 16743 for (size_t k = 1; k <= 40; k += 9) { 16744 for (uint32_t m = 1; m <= 4; m++) { 16745 GemmMicrokernelTester() 16746 .mr(4) 16747 .nr(16) 16748 .kr(1) 16749 .sr(1) 16750 .m(m) 16751 .n(n) 16752 .k(k) 16753 .iterations(1) 16754 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16755 } 16756 } 16757 } 16758 } 16759 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_div_16)16760 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16) { 16761 TEST_REQUIRES_ARM_NEON; 16762 for (uint32_t n = 32; n <= 48; n += 16) { 16763 for (size_t k = 1; k <= 40; k += 9) { 16764 GemmMicrokernelTester() 16765 .mr(4) 16766 .nr(16) 16767 .kr(1) 16768 .sr(1) 16769 .m(4) 16770 .n(n) 16771 .k(k) 16772 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16773 } 16774 } 16775 } 16776 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_div_16_strided_cn)16777 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16_strided_cn) { 16778 TEST_REQUIRES_ARM_NEON; 16779 for (uint32_t n = 32; n <= 48; n += 16) { 16780 for (size_t k = 1; k <= 40; k += 9) { 16781 GemmMicrokernelTester() 16782 .mr(4) 16783 .nr(16) 16784 .kr(1) 16785 .sr(1) 16786 .m(4) 16787 .n(n) 16788 .k(k) 16789 .cn_stride(19) 16790 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16791 } 16792 } 16793 } 16794 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_div_16_strided_a)16795 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16_strided_a) { 16796 TEST_REQUIRES_ARM_NEON; 16797 for (uint32_t n = 32; n <= 48; n += 16) { 16798 for (size_t k = 1; k <= 40; k += 9) { 16799 GemmMicrokernelTester() 16800 .mr(4) 16801 .nr(16) 16802 .kr(1) 16803 .sr(1) 16804 .m(4) 16805 .n(n) 16806 .k(k) 16807 .a_stride(43) 16808 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16809 } 16810 } 16811 } 16812 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_div_16_subtile)16813 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16_subtile) { 16814 TEST_REQUIRES_ARM_NEON; 16815 for (uint32_t n = 32; n <= 48; n += 16) { 16816 for (size_t k = 1; k <= 40; k += 9) { 16817 for (uint32_t m = 1; m <= 4; m++) { 16818 GemmMicrokernelTester() 16819 .mr(4) 16820 .nr(16) 16821 .kr(1) 16822 .sr(1) 16823 .m(m) 16824 .n(n) 16825 .k(k) 16826 .iterations(1) 16827 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16828 } 16829 } 16830 } 16831 } 16832 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,strided_cm_subtile)16833 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, strided_cm_subtile) { 16834 TEST_REQUIRES_ARM_NEON; 16835 for (size_t k = 1; k <= 40; k += 9) { 16836 for (uint32_t n = 1; n <= 16; n++) { 16837 for (uint32_t m = 1; m <= 4; m++) { 16838 GemmMicrokernelTester() 16839 .mr(4) 16840 .nr(16) 16841 .kr(1) 16842 .sr(1) 16843 .m(m) 16844 .n(n) 16845 .k(k) 16846 .cm_stride(19) 16847 .iterations(1) 16848 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16849 } 16850 } 16851 } 16852 } 16853 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,qmin)16854 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, qmin) { 16855 TEST_REQUIRES_ARM_NEON; 16856 GemmMicrokernelTester() 16857 .mr(4) 16858 .nr(16) 16859 .kr(1) 16860 .sr(1) 16861 .m(4) 16862 .n(16) 16863 .k(8) 16864 .qmin(128) 16865 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16866 } 16867 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,qmax)16868 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, qmax) { 16869 TEST_REQUIRES_ARM_NEON; 16870 GemmMicrokernelTester() 16871 .mr(4) 16872 .nr(16) 16873 .kr(1) 16874 .sr(1) 16875 .m(4) 16876 .n(16) 16877 .k(8) 16878 .qmax(128) 16879 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16880 } 16881 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,strided_cm)16882 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, strided_cm) { 16883 TEST_REQUIRES_ARM_NEON; 16884 GemmMicrokernelTester() 16885 .mr(4) 16886 .nr(16) 16887 .kr(1) 16888 .sr(1) 16889 .m(4) 16890 .n(16) 16891 .k(8) 16892 .cm_stride(19) 16893 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16894 } 16895 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 16896 16897 16898 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_eq_8)16899 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_eq_8) { 16900 TEST_REQUIRES_ARM_NEON; 16901 GemmMicrokernelTester() 16902 .mr(6) 16903 .nr(8) 16904 .kr(1) 16905 .sr(1) 16906 .m(6) 16907 .n(8) 16908 .k(8) 16909 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16910 } 16911 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,strided_cn)16912 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, strided_cn) { 16913 TEST_REQUIRES_ARM_NEON; 16914 GemmMicrokernelTester() 16915 .mr(6) 16916 .nr(8) 16917 .kr(1) 16918 .sr(1) 16919 .m(6) 16920 .n(8) 16921 .k(8) 16922 .cn_stride(11) 16923 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16924 } 16925 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_eq_8_strided_a)16926 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) { 16927 TEST_REQUIRES_ARM_NEON; 16928 GemmMicrokernelTester() 16929 .mr(6) 16930 .nr(8) 16931 .kr(1) 16932 .sr(1) 16933 .m(6) 16934 .n(8) 16935 .k(8) 16936 .a_stride(11) 16937 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16938 } 16939 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)16940 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) { 16941 TEST_REQUIRES_ARM_NEON; 16942 for (uint32_t n = 1; n <= 8; n++) { 16943 for (uint32_t m = 1; m <= 6; m++) { 16944 GemmMicrokernelTester() 16945 .mr(6) 16946 .nr(8) 16947 .kr(1) 16948 .sr(1) 16949 .m(m) 16950 .n(n) 16951 .k(8) 16952 .iterations(1) 16953 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16954 } 16955 } 16956 } 16957 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)16958 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) { 16959 TEST_REQUIRES_ARM_NEON; 16960 for (uint32_t m = 1; m <= 6; m++) { 16961 GemmMicrokernelTester() 16962 .mr(6) 16963 .nr(8) 16964 .kr(1) 16965 .sr(1) 16966 .m(m) 16967 .n(8) 16968 .k(8) 16969 .iterations(1) 16970 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16971 } 16972 } 16973 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)16974 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) { 16975 TEST_REQUIRES_ARM_NEON; 16976 for (uint32_t n = 1; n <= 8; n++) { 16977 GemmMicrokernelTester() 16978 .mr(6) 16979 .nr(8) 16980 .kr(1) 16981 .sr(1) 16982 .m(6) 16983 .n(n) 16984 .k(8) 16985 .iterations(1) 16986 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16987 } 16988 } 16989 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_lt_8)16990 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_lt_8) { 16991 TEST_REQUIRES_ARM_NEON; 16992 for (size_t k = 1; k < 8; k++) { 16993 GemmMicrokernelTester() 16994 .mr(6) 16995 .nr(8) 16996 .kr(1) 16997 .sr(1) 16998 .m(6) 16999 .n(8) 17000 .k(k) 17001 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17002 } 17003 } 17004 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_lt_8_strided_a)17005 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) { 17006 TEST_REQUIRES_ARM_NEON; 17007 for (size_t k = 1; k < 8; k++) { 17008 GemmMicrokernelTester() 17009 .mr(6) 17010 .nr(8) 17011 .kr(1) 17012 .sr(1) 17013 .m(6) 17014 .n(8) 17015 .k(k) 17016 .a_stride(11) 17017 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17018 } 17019 } 17020 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)17021 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) { 17022 TEST_REQUIRES_ARM_NEON; 17023 for (size_t k = 1; k < 8; k++) { 17024 for (uint32_t n = 1; n <= 8; n++) { 17025 for (uint32_t m = 1; m <= 6; m++) { 17026 GemmMicrokernelTester() 17027 .mr(6) 17028 .nr(8) 17029 .kr(1) 17030 .sr(1) 17031 .m(m) 17032 .n(n) 17033 .k(k) 17034 .iterations(1) 17035 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17036 } 17037 } 17038 } 17039 } 17040 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_gt_8)17041 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_gt_8) { 17042 TEST_REQUIRES_ARM_NEON; 17043 for (size_t k = 9; k < 16; k++) { 17044 GemmMicrokernelTester() 17045 .mr(6) 17046 .nr(8) 17047 .kr(1) 17048 .sr(1) 17049 .m(6) 17050 .n(8) 17051 .k(k) 17052 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17053 } 17054 } 17055 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_gt_8_strided_a)17056 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) { 17057 TEST_REQUIRES_ARM_NEON; 17058 for (size_t k = 9; k < 16; k++) { 17059 GemmMicrokernelTester() 17060 .mr(6) 17061 .nr(8) 17062 .kr(1) 17063 .sr(1) 17064 .m(6) 17065 .n(8) 17066 .k(k) 17067 .a_stride(19) 17068 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17069 } 17070 } 17071 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)17072 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) { 17073 TEST_REQUIRES_ARM_NEON; 17074 for (size_t k = 9; k < 16; k++) { 17075 for (uint32_t n = 1; n <= 8; n++) { 17076 for (uint32_t m = 1; m <= 6; m++) { 17077 GemmMicrokernelTester() 17078 .mr(6) 17079 .nr(8) 17080 .kr(1) 17081 .sr(1) 17082 .m(m) 17083 .n(n) 17084 .k(k) 17085 .iterations(1) 17086 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17087 } 17088 } 17089 } 17090 } 17091 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_div_8)17092 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_div_8) { 17093 TEST_REQUIRES_ARM_NEON; 17094 for (size_t k = 16; k <= 80; k += 8) { 17095 GemmMicrokernelTester() 17096 .mr(6) 17097 .nr(8) 17098 .kr(1) 17099 .sr(1) 17100 .m(6) 17101 .n(8) 17102 .k(k) 17103 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17104 } 17105 } 17106 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_div_8_strided_a)17107 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) { 17108 TEST_REQUIRES_ARM_NEON; 17109 for (size_t k = 16; k <= 80; k += 8) { 17110 GemmMicrokernelTester() 17111 .mr(6) 17112 .nr(8) 17113 .kr(1) 17114 .sr(1) 17115 .m(6) 17116 .n(8) 17117 .k(k) 17118 .a_stride(83) 17119 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17120 } 17121 } 17122 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_div_8_subtile)17123 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) { 17124 TEST_REQUIRES_ARM_NEON; 17125 for (size_t k = 16; k <= 80; k += 8) { 17126 for (uint32_t n = 1; n <= 8; n++) { 17127 for (uint32_t m = 1; m <= 6; m++) { 17128 GemmMicrokernelTester() 17129 .mr(6) 17130 .nr(8) 17131 .kr(1) 17132 .sr(1) 17133 .m(m) 17134 .n(n) 17135 .k(k) 17136 .iterations(1) 17137 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17138 } 17139 } 17140 } 17141 } 17142 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,n_gt_8)17143 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_gt_8) { 17144 TEST_REQUIRES_ARM_NEON; 17145 for (uint32_t n = 9; n < 16; n++) { 17146 for (size_t k = 1; k <= 40; k += 9) { 17147 GemmMicrokernelTester() 17148 .mr(6) 17149 .nr(8) 17150 .kr(1) 17151 .sr(1) 17152 .m(6) 17153 .n(n) 17154 .k(k) 17155 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17156 } 17157 } 17158 } 17159 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,n_gt_8_strided_cn)17160 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) { 17161 TEST_REQUIRES_ARM_NEON; 17162 for (uint32_t n = 9; n < 16; n++) { 17163 for (size_t k = 1; k <= 40; k += 9) { 17164 GemmMicrokernelTester() 17165 .mr(6) 17166 .nr(8) 17167 .kr(1) 17168 .sr(1) 17169 .m(6) 17170 .n(n) 17171 .k(k) 17172 .cn_stride(11) 17173 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17174 } 17175 } 17176 } 17177 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,n_gt_8_strided_a)17178 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_a) { 17179 TEST_REQUIRES_ARM_NEON; 17180 for (uint32_t n = 9; n < 16; n++) { 17181 for (size_t k = 1; k <= 40; k += 9) { 17182 GemmMicrokernelTester() 17183 .mr(6) 17184 .nr(8) 17185 .kr(1) 17186 .sr(1) 17187 .m(6) 17188 .n(n) 17189 .k(k) 17190 .a_stride(43) 17191 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17192 } 17193 } 17194 } 17195 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,n_gt_8_subtile)17196 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) { 17197 TEST_REQUIRES_ARM_NEON; 17198 for (uint32_t n = 9; n < 16; n++) { 17199 for (size_t k = 1; k <= 40; k += 9) { 17200 for (uint32_t m = 1; m <= 6; m++) { 17201 GemmMicrokernelTester() 17202 .mr(6) 17203 .nr(8) 17204 .kr(1) 17205 .sr(1) 17206 .m(m) 17207 .n(n) 17208 .k(k) 17209 .iterations(1) 17210 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17211 } 17212 } 17213 } 17214 } 17215 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,n_div_8)17216 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_div_8) { 17217 TEST_REQUIRES_ARM_NEON; 17218 for (uint32_t n = 16; n <= 24; n += 8) { 17219 for (size_t k = 1; k <= 40; k += 9) { 17220 GemmMicrokernelTester() 17221 .mr(6) 17222 .nr(8) 17223 .kr(1) 17224 .sr(1) 17225 .m(6) 17226 .n(n) 17227 .k(k) 17228 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17229 } 17230 } 17231 } 17232 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,n_div_8_strided_cn)17233 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) { 17234 TEST_REQUIRES_ARM_NEON; 17235 for (uint32_t n = 16; n <= 24; n += 8) { 17236 for (size_t k = 1; k <= 40; k += 9) { 17237 GemmMicrokernelTester() 17238 .mr(6) 17239 .nr(8) 17240 .kr(1) 17241 .sr(1) 17242 .m(6) 17243 .n(n) 17244 .k(k) 17245 .cn_stride(11) 17246 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17247 } 17248 } 17249 } 17250 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,n_div_8_strided_a)17251 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_a) { 17252 TEST_REQUIRES_ARM_NEON; 17253 for (uint32_t n = 16; n <= 24; n += 8) { 17254 for (size_t k = 1; k <= 40; k += 9) { 17255 GemmMicrokernelTester() 17256 .mr(6) 17257 .nr(8) 17258 .kr(1) 17259 .sr(1) 17260 .m(6) 17261 .n(n) 17262 .k(k) 17263 .a_stride(43) 17264 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17265 } 17266 } 17267 } 17268 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,n_div_8_subtile)17269 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) { 17270 TEST_REQUIRES_ARM_NEON; 17271 for (uint32_t n = 16; n <= 24; n += 8) { 17272 for (size_t k = 1; k <= 40; k += 9) { 17273 for (uint32_t m = 1; m <= 6; m++) { 17274 GemmMicrokernelTester() 17275 .mr(6) 17276 .nr(8) 17277 .kr(1) 17278 .sr(1) 17279 .m(m) 17280 .n(n) 17281 .k(k) 17282 .iterations(1) 17283 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17284 } 17285 } 17286 } 17287 } 17288 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,strided_cm_subtile)17289 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) { 17290 TEST_REQUIRES_ARM_NEON; 17291 for (size_t k = 1; k <= 40; k += 9) { 17292 for (uint32_t n = 1; n <= 8; n++) { 17293 for (uint32_t m = 1; m <= 6; m++) { 17294 GemmMicrokernelTester() 17295 .mr(6) 17296 .nr(8) 17297 .kr(1) 17298 .sr(1) 17299 .m(m) 17300 .n(n) 17301 .k(k) 17302 .cm_stride(11) 17303 .iterations(1) 17304 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17305 } 17306 } 17307 } 17308 } 17309 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,qmin)17310 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, qmin) { 17311 TEST_REQUIRES_ARM_NEON; 17312 GemmMicrokernelTester() 17313 .mr(6) 17314 .nr(8) 17315 .kr(1) 17316 .sr(1) 17317 .m(6) 17318 .n(8) 17319 .k(8) 17320 .qmin(128) 17321 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17322 } 17323 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,qmax)17324 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, qmax) { 17325 TEST_REQUIRES_ARM_NEON; 17326 GemmMicrokernelTester() 17327 .mr(6) 17328 .nr(8) 17329 .kr(1) 17330 .sr(1) 17331 .m(6) 17332 .n(8) 17333 .k(8) 17334 .qmax(128) 17335 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17336 } 17337 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,strided_cm)17338 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, strided_cm) { 17339 TEST_REQUIRES_ARM_NEON; 17340 GemmMicrokernelTester() 17341 .mr(6) 17342 .nr(8) 17343 .kr(1) 17344 .sr(1) 17345 .m(6) 17346 .n(8) 17347 .k(8) 17348 .cm_stride(11) 17349 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17350 } 17351 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 17352 17353 17354 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_eq_8)17355 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_eq_8) { 17356 TEST_REQUIRES_ARM_NEON; 17357 GemmMicrokernelTester() 17358 .mr(2) 17359 .nr(16) 17360 .kr(1) 17361 .sr(1) 17362 .m(2) 17363 .n(16) 17364 .k(8) 17365 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17366 } 17367 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,strided_cn)17368 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, strided_cn) { 17369 TEST_REQUIRES_ARM_NEON; 17370 GemmMicrokernelTester() 17371 .mr(2) 17372 .nr(16) 17373 .kr(1) 17374 .sr(1) 17375 .m(2) 17376 .n(16) 17377 .k(8) 17378 .cn_stride(19) 17379 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17380 } 17381 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_eq_8_strided_a)17382 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) { 17383 TEST_REQUIRES_ARM_NEON; 17384 GemmMicrokernelTester() 17385 .mr(2) 17386 .nr(16) 17387 .kr(1) 17388 .sr(1) 17389 .m(2) 17390 .n(16) 17391 .k(8) 17392 .a_stride(11) 17393 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17394 } 17395 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)17396 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) { 17397 TEST_REQUIRES_ARM_NEON; 17398 for (uint32_t n = 1; n <= 16; n++) { 17399 for (uint32_t m = 1; m <= 2; m++) { 17400 GemmMicrokernelTester() 17401 .mr(2) 17402 .nr(16) 17403 .kr(1) 17404 .sr(1) 17405 .m(m) 17406 .n(n) 17407 .k(8) 17408 .iterations(1) 17409 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17410 } 17411 } 17412 } 17413 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)17414 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) { 17415 TEST_REQUIRES_ARM_NEON; 17416 for (uint32_t m = 1; m <= 2; m++) { 17417 GemmMicrokernelTester() 17418 .mr(2) 17419 .nr(16) 17420 .kr(1) 17421 .sr(1) 17422 .m(m) 17423 .n(16) 17424 .k(8) 17425 .iterations(1) 17426 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17427 } 17428 } 17429 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)17430 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) { 17431 TEST_REQUIRES_ARM_NEON; 17432 for (uint32_t n = 1; n <= 16; n++) { 17433 GemmMicrokernelTester() 17434 .mr(2) 17435 .nr(16) 17436 .kr(1) 17437 .sr(1) 17438 .m(2) 17439 .n(n) 17440 .k(8) 17441 .iterations(1) 17442 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17443 } 17444 } 17445 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_lt_8)17446 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_lt_8) { 17447 TEST_REQUIRES_ARM_NEON; 17448 for (size_t k = 1; k < 8; k++) { 17449 GemmMicrokernelTester() 17450 .mr(2) 17451 .nr(16) 17452 .kr(1) 17453 .sr(1) 17454 .m(2) 17455 .n(16) 17456 .k(k) 17457 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17458 } 17459 } 17460 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_lt_8_strided_a)17461 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) { 17462 TEST_REQUIRES_ARM_NEON; 17463 for (size_t k = 1; k < 8; k++) { 17464 GemmMicrokernelTester() 17465 .mr(2) 17466 .nr(16) 17467 .kr(1) 17468 .sr(1) 17469 .m(2) 17470 .n(16) 17471 .k(k) 17472 .a_stride(11) 17473 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17474 } 17475 } 17476 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)17477 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) { 17478 TEST_REQUIRES_ARM_NEON; 17479 for (size_t k = 1; k < 8; k++) { 17480 for (uint32_t n = 1; n <= 16; n++) { 17481 for (uint32_t m = 1; m <= 2; m++) { 17482 GemmMicrokernelTester() 17483 .mr(2) 17484 .nr(16) 17485 .kr(1) 17486 .sr(1) 17487 .m(m) 17488 .n(n) 17489 .k(k) 17490 .iterations(1) 17491 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17492 } 17493 } 17494 } 17495 } 17496 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_gt_8)17497 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_gt_8) { 17498 TEST_REQUIRES_ARM_NEON; 17499 for (size_t k = 9; k < 16; k++) { 17500 GemmMicrokernelTester() 17501 .mr(2) 17502 .nr(16) 17503 .kr(1) 17504 .sr(1) 17505 .m(2) 17506 .n(16) 17507 .k(k) 17508 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17509 } 17510 } 17511 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_gt_8_strided_a)17512 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) { 17513 TEST_REQUIRES_ARM_NEON; 17514 for (size_t k = 9; k < 16; k++) { 17515 GemmMicrokernelTester() 17516 .mr(2) 17517 .nr(16) 17518 .kr(1) 17519 .sr(1) 17520 .m(2) 17521 .n(16) 17522 .k(k) 17523 .a_stride(19) 17524 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17525 } 17526 } 17527 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)17528 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) { 17529 TEST_REQUIRES_ARM_NEON; 17530 for (size_t k = 9; k < 16; k++) { 17531 for (uint32_t n = 1; n <= 16; n++) { 17532 for (uint32_t m = 1; m <= 2; m++) { 17533 GemmMicrokernelTester() 17534 .mr(2) 17535 .nr(16) 17536 .kr(1) 17537 .sr(1) 17538 .m(m) 17539 .n(n) 17540 .k(k) 17541 .iterations(1) 17542 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17543 } 17544 } 17545 } 17546 } 17547 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_div_8)17548 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_div_8) { 17549 TEST_REQUIRES_ARM_NEON; 17550 for (size_t k = 16; k <= 80; k += 8) { 17551 GemmMicrokernelTester() 17552 .mr(2) 17553 .nr(16) 17554 .kr(1) 17555 .sr(1) 17556 .m(2) 17557 .n(16) 17558 .k(k) 17559 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17560 } 17561 } 17562 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_div_8_strided_a)17563 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) { 17564 TEST_REQUIRES_ARM_NEON; 17565 for (size_t k = 16; k <= 80; k += 8) { 17566 GemmMicrokernelTester() 17567 .mr(2) 17568 .nr(16) 17569 .kr(1) 17570 .sr(1) 17571 .m(2) 17572 .n(16) 17573 .k(k) 17574 .a_stride(83) 17575 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17576 } 17577 } 17578 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_div_8_subtile)17579 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) { 17580 TEST_REQUIRES_ARM_NEON; 17581 for (size_t k = 16; k <= 80; k += 8) { 17582 for (uint32_t n = 1; n <= 16; n++) { 17583 for (uint32_t m = 1; m <= 2; m++) { 17584 GemmMicrokernelTester() 17585 .mr(2) 17586 .nr(16) 17587 .kr(1) 17588 .sr(1) 17589 .m(m) 17590 .n(n) 17591 .k(k) 17592 .iterations(1) 17593 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17594 } 17595 } 17596 } 17597 } 17598 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,n_gt_16)17599 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_gt_16) { 17600 TEST_REQUIRES_ARM_NEON; 17601 for (uint32_t n = 17; n < 32; n++) { 17602 for (size_t k = 1; k <= 40; k += 9) { 17603 GemmMicrokernelTester() 17604 .mr(2) 17605 .nr(16) 17606 .kr(1) 17607 .sr(1) 17608 .m(2) 17609 .n(n) 17610 .k(k) 17611 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17612 } 17613 } 17614 } 17615 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_cn)17616 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) { 17617 TEST_REQUIRES_ARM_NEON; 17618 for (uint32_t n = 17; n < 32; n++) { 17619 for (size_t k = 1; k <= 40; k += 9) { 17620 GemmMicrokernelTester() 17621 .mr(2) 17622 .nr(16) 17623 .kr(1) 17624 .sr(1) 17625 .m(2) 17626 .n(n) 17627 .k(k) 17628 .cn_stride(19) 17629 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17630 } 17631 } 17632 } 17633 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_a)17634 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_a) { 17635 TEST_REQUIRES_ARM_NEON; 17636 for (uint32_t n = 17; n < 32; n++) { 17637 for (size_t k = 1; k <= 40; k += 9) { 17638 GemmMicrokernelTester() 17639 .mr(2) 17640 .nr(16) 17641 .kr(1) 17642 .sr(1) 17643 .m(2) 17644 .n(n) 17645 .k(k) 17646 .a_stride(43) 17647 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17648 } 17649 } 17650 } 17651 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,n_gt_16_subtile)17652 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) { 17653 TEST_REQUIRES_ARM_NEON; 17654 for (uint32_t n = 17; n < 32; n++) { 17655 for (size_t k = 1; k <= 40; k += 9) { 17656 for (uint32_t m = 1; m <= 2; m++) { 17657 GemmMicrokernelTester() 17658 .mr(2) 17659 .nr(16) 17660 .kr(1) 17661 .sr(1) 17662 .m(m) 17663 .n(n) 17664 .k(k) 17665 .iterations(1) 17666 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17667 } 17668 } 17669 } 17670 } 17671 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,n_div_16)17672 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_div_16) { 17673 TEST_REQUIRES_ARM_NEON; 17674 for (uint32_t n = 32; n <= 48; n += 16) { 17675 for (size_t k = 1; k <= 40; k += 9) { 17676 GemmMicrokernelTester() 17677 .mr(2) 17678 .nr(16) 17679 .kr(1) 17680 .sr(1) 17681 .m(2) 17682 .n(n) 17683 .k(k) 17684 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17685 } 17686 } 17687 } 17688 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_cn)17689 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) { 17690 TEST_REQUIRES_ARM_NEON; 17691 for (uint32_t n = 32; n <= 48; n += 16) { 17692 for (size_t k = 1; k <= 40; k += 9) { 17693 GemmMicrokernelTester() 17694 .mr(2) 17695 .nr(16) 17696 .kr(1) 17697 .sr(1) 17698 .m(2) 17699 .n(n) 17700 .k(k) 17701 .cn_stride(19) 17702 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17703 } 17704 } 17705 } 17706 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_a)17707 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_a) { 17708 TEST_REQUIRES_ARM_NEON; 17709 for (uint32_t n = 32; n <= 48; n += 16) { 17710 for (size_t k = 1; k <= 40; k += 9) { 17711 GemmMicrokernelTester() 17712 .mr(2) 17713 .nr(16) 17714 .kr(1) 17715 .sr(1) 17716 .m(2) 17717 .n(n) 17718 .k(k) 17719 .a_stride(43) 17720 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17721 } 17722 } 17723 } 17724 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,n_div_16_subtile)17725 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) { 17726 TEST_REQUIRES_ARM_NEON; 17727 for (uint32_t n = 32; n <= 48; n += 16) { 17728 for (size_t k = 1; k <= 40; k += 9) { 17729 for (uint32_t m = 1; m <= 2; m++) { 17730 GemmMicrokernelTester() 17731 .mr(2) 17732 .nr(16) 17733 .kr(1) 17734 .sr(1) 17735 .m(m) 17736 .n(n) 17737 .k(k) 17738 .iterations(1) 17739 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17740 } 17741 } 17742 } 17743 } 17744 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,strided_cm_subtile)17745 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) { 17746 TEST_REQUIRES_ARM_NEON; 17747 for (size_t k = 1; k <= 40; k += 9) { 17748 for (uint32_t n = 1; n <= 16; n++) { 17749 for (uint32_t m = 1; m <= 2; m++) { 17750 GemmMicrokernelTester() 17751 .mr(2) 17752 .nr(16) 17753 .kr(1) 17754 .sr(1) 17755 .m(m) 17756 .n(n) 17757 .k(k) 17758 .cm_stride(19) 17759 .iterations(1) 17760 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17761 } 17762 } 17763 } 17764 } 17765 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,qmin)17766 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, qmin) { 17767 TEST_REQUIRES_ARM_NEON; 17768 GemmMicrokernelTester() 17769 .mr(2) 17770 .nr(16) 17771 .kr(1) 17772 .sr(1) 17773 .m(2) 17774 .n(16) 17775 .k(8) 17776 .qmin(128) 17777 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17778 } 17779 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,qmax)17780 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, qmax) { 17781 TEST_REQUIRES_ARM_NEON; 17782 GemmMicrokernelTester() 17783 .mr(2) 17784 .nr(16) 17785 .kr(1) 17786 .sr(1) 17787 .m(2) 17788 .n(16) 17789 .k(8) 17790 .qmax(128) 17791 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17792 } 17793 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,strided_cm)17794 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, strided_cm) { 17795 TEST_REQUIRES_ARM_NEON; 17796 GemmMicrokernelTester() 17797 .mr(2) 17798 .nr(16) 17799 .kr(1) 17800 .sr(1) 17801 .m(2) 17802 .n(16) 17803 .k(8) 17804 .cm_stride(19) 17805 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17806 } 17807 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 17808 17809 17810 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_eq_8)17811 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_eq_8) { 17812 TEST_REQUIRES_ARM_NEON; 17813 GemmMicrokernelTester() 17814 .mr(3) 17815 .nr(16) 17816 .kr(1) 17817 .sr(1) 17818 .m(3) 17819 .n(16) 17820 .k(8) 17821 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17822 } 17823 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,strided_cn)17824 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, strided_cn) { 17825 TEST_REQUIRES_ARM_NEON; 17826 GemmMicrokernelTester() 17827 .mr(3) 17828 .nr(16) 17829 .kr(1) 17830 .sr(1) 17831 .m(3) 17832 .n(16) 17833 .k(8) 17834 .cn_stride(19) 17835 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17836 } 17837 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_eq_8_strided_a)17838 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) { 17839 TEST_REQUIRES_ARM_NEON; 17840 GemmMicrokernelTester() 17841 .mr(3) 17842 .nr(16) 17843 .kr(1) 17844 .sr(1) 17845 .m(3) 17846 .n(16) 17847 .k(8) 17848 .a_stride(11) 17849 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17850 } 17851 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)17852 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) { 17853 TEST_REQUIRES_ARM_NEON; 17854 for (uint32_t n = 1; n <= 16; n++) { 17855 for (uint32_t m = 1; m <= 3; m++) { 17856 GemmMicrokernelTester() 17857 .mr(3) 17858 .nr(16) 17859 .kr(1) 17860 .sr(1) 17861 .m(m) 17862 .n(n) 17863 .k(8) 17864 .iterations(1) 17865 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17866 } 17867 } 17868 } 17869 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)17870 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) { 17871 TEST_REQUIRES_ARM_NEON; 17872 for (uint32_t m = 1; m <= 3; m++) { 17873 GemmMicrokernelTester() 17874 .mr(3) 17875 .nr(16) 17876 .kr(1) 17877 .sr(1) 17878 .m(m) 17879 .n(16) 17880 .k(8) 17881 .iterations(1) 17882 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17883 } 17884 } 17885 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)17886 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) { 17887 TEST_REQUIRES_ARM_NEON; 17888 for (uint32_t n = 1; n <= 16; n++) { 17889 GemmMicrokernelTester() 17890 .mr(3) 17891 .nr(16) 17892 .kr(1) 17893 .sr(1) 17894 .m(3) 17895 .n(n) 17896 .k(8) 17897 .iterations(1) 17898 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17899 } 17900 } 17901 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_lt_8)17902 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_lt_8) { 17903 TEST_REQUIRES_ARM_NEON; 17904 for (size_t k = 1; k < 8; k++) { 17905 GemmMicrokernelTester() 17906 .mr(3) 17907 .nr(16) 17908 .kr(1) 17909 .sr(1) 17910 .m(3) 17911 .n(16) 17912 .k(k) 17913 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17914 } 17915 } 17916 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_lt_8_strided_a)17917 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) { 17918 TEST_REQUIRES_ARM_NEON; 17919 for (size_t k = 1; k < 8; k++) { 17920 GemmMicrokernelTester() 17921 .mr(3) 17922 .nr(16) 17923 .kr(1) 17924 .sr(1) 17925 .m(3) 17926 .n(16) 17927 .k(k) 17928 .a_stride(11) 17929 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17930 } 17931 } 17932 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)17933 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) { 17934 TEST_REQUIRES_ARM_NEON; 17935 for (size_t k = 1; k < 8; k++) { 17936 for (uint32_t n = 1; n <= 16; n++) { 17937 for (uint32_t m = 1; m <= 3; m++) { 17938 GemmMicrokernelTester() 17939 .mr(3) 17940 .nr(16) 17941 .kr(1) 17942 .sr(1) 17943 .m(m) 17944 .n(n) 17945 .k(k) 17946 .iterations(1) 17947 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17948 } 17949 } 17950 } 17951 } 17952 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_gt_8)17953 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_gt_8) { 17954 TEST_REQUIRES_ARM_NEON; 17955 for (size_t k = 9; k < 16; k++) { 17956 GemmMicrokernelTester() 17957 .mr(3) 17958 .nr(16) 17959 .kr(1) 17960 .sr(1) 17961 .m(3) 17962 .n(16) 17963 .k(k) 17964 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17965 } 17966 } 17967 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_gt_8_strided_a)17968 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) { 17969 TEST_REQUIRES_ARM_NEON; 17970 for (size_t k = 9; k < 16; k++) { 17971 GemmMicrokernelTester() 17972 .mr(3) 17973 .nr(16) 17974 .kr(1) 17975 .sr(1) 17976 .m(3) 17977 .n(16) 17978 .k(k) 17979 .a_stride(19) 17980 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17981 } 17982 } 17983 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)17984 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) { 17985 TEST_REQUIRES_ARM_NEON; 17986 for (size_t k = 9; k < 16; k++) { 17987 for (uint32_t n = 1; n <= 16; n++) { 17988 for (uint32_t m = 1; m <= 3; m++) { 17989 GemmMicrokernelTester() 17990 .mr(3) 17991 .nr(16) 17992 .kr(1) 17993 .sr(1) 17994 .m(m) 17995 .n(n) 17996 .k(k) 17997 .iterations(1) 17998 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17999 } 18000 } 18001 } 18002 } 18003 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_div_8)18004 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_div_8) { 18005 TEST_REQUIRES_ARM_NEON; 18006 for (size_t k = 16; k <= 80; k += 8) { 18007 GemmMicrokernelTester() 18008 .mr(3) 18009 .nr(16) 18010 .kr(1) 18011 .sr(1) 18012 .m(3) 18013 .n(16) 18014 .k(k) 18015 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18016 } 18017 } 18018 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_div_8_strided_a)18019 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) { 18020 TEST_REQUIRES_ARM_NEON; 18021 for (size_t k = 16; k <= 80; k += 8) { 18022 GemmMicrokernelTester() 18023 .mr(3) 18024 .nr(16) 18025 .kr(1) 18026 .sr(1) 18027 .m(3) 18028 .n(16) 18029 .k(k) 18030 .a_stride(83) 18031 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18032 } 18033 } 18034 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_div_8_subtile)18035 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) { 18036 TEST_REQUIRES_ARM_NEON; 18037 for (size_t k = 16; k <= 80; k += 8) { 18038 for (uint32_t n = 1; n <= 16; n++) { 18039 for (uint32_t m = 1; m <= 3; m++) { 18040 GemmMicrokernelTester() 18041 .mr(3) 18042 .nr(16) 18043 .kr(1) 18044 .sr(1) 18045 .m(m) 18046 .n(n) 18047 .k(k) 18048 .iterations(1) 18049 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18050 } 18051 } 18052 } 18053 } 18054 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,n_gt_16)18055 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, n_gt_16) { 18056 TEST_REQUIRES_ARM_NEON; 18057 for (uint32_t n = 17; n < 32; n++) { 18058 for (size_t k = 1; k <= 40; k += 9) { 18059 GemmMicrokernelTester() 18060 .mr(3) 18061 .nr(16) 18062 .kr(1) 18063 .sr(1) 18064 .m(3) 18065 .n(n) 18066 .k(k) 18067 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18068 } 18069 } 18070 } 18071 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_cn)18072 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) { 18073 TEST_REQUIRES_ARM_NEON; 18074 for (uint32_t n = 17; n < 32; n++) { 18075 for (size_t k = 1; k <= 40; k += 9) { 18076 GemmMicrokernelTester() 18077 .mr(3) 18078 .nr(16) 18079 .kr(1) 18080 .sr(1) 18081 .m(3) 18082 .n(n) 18083 .k(k) 18084 .cn_stride(19) 18085 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18086 } 18087 } 18088 } 18089 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_a)18090 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_a) { 18091 TEST_REQUIRES_ARM_NEON; 18092 for (uint32_t n = 17; n < 32; n++) { 18093 for (size_t k = 1; k <= 40; k += 9) { 18094 GemmMicrokernelTester() 18095 .mr(3) 18096 .nr(16) 18097 .kr(1) 18098 .sr(1) 18099 .m(3) 18100 .n(n) 18101 .k(k) 18102 .a_stride(43) 18103 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18104 } 18105 } 18106 } 18107 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,n_gt_16_subtile)18108 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) { 18109 TEST_REQUIRES_ARM_NEON; 18110 for (uint32_t n = 17; n < 32; n++) { 18111 for (size_t k = 1; k <= 40; k += 9) { 18112 for (uint32_t m = 1; m <= 3; m++) { 18113 GemmMicrokernelTester() 18114 .mr(3) 18115 .nr(16) 18116 .kr(1) 18117 .sr(1) 18118 .m(m) 18119 .n(n) 18120 .k(k) 18121 .iterations(1) 18122 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18123 } 18124 } 18125 } 18126 } 18127 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,n_div_16)18128 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, n_div_16) { 18129 TEST_REQUIRES_ARM_NEON; 18130 for (uint32_t n = 32; n <= 48; n += 16) { 18131 for (size_t k = 1; k <= 40; k += 9) { 18132 GemmMicrokernelTester() 18133 .mr(3) 18134 .nr(16) 18135 .kr(1) 18136 .sr(1) 18137 .m(3) 18138 .n(n) 18139 .k(k) 18140 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18141 } 18142 } 18143 } 18144 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_cn)18145 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) { 18146 TEST_REQUIRES_ARM_NEON; 18147 for (uint32_t n = 32; n <= 48; n += 16) { 18148 for (size_t k = 1; k <= 40; k += 9) { 18149 GemmMicrokernelTester() 18150 .mr(3) 18151 .nr(16) 18152 .kr(1) 18153 .sr(1) 18154 .m(3) 18155 .n(n) 18156 .k(k) 18157 .cn_stride(19) 18158 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18159 } 18160 } 18161 } 18162 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_a)18163 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_a) { 18164 TEST_REQUIRES_ARM_NEON; 18165 for (uint32_t n = 32; n <= 48; n += 16) { 18166 for (size_t k = 1; k <= 40; k += 9) { 18167 GemmMicrokernelTester() 18168 .mr(3) 18169 .nr(16) 18170 .kr(1) 18171 .sr(1) 18172 .m(3) 18173 .n(n) 18174 .k(k) 18175 .a_stride(43) 18176 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18177 } 18178 } 18179 } 18180 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,n_div_16_subtile)18181 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) { 18182 TEST_REQUIRES_ARM_NEON; 18183 for (uint32_t n = 32; n <= 48; n += 16) { 18184 for (size_t k = 1; k <= 40; k += 9) { 18185 for (uint32_t m = 1; m <= 3; m++) { 18186 GemmMicrokernelTester() 18187 .mr(3) 18188 .nr(16) 18189 .kr(1) 18190 .sr(1) 18191 .m(m) 18192 .n(n) 18193 .k(k) 18194 .iterations(1) 18195 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18196 } 18197 } 18198 } 18199 } 18200 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,strided_cm_subtile)18201 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) { 18202 TEST_REQUIRES_ARM_NEON; 18203 for (size_t k = 1; k <= 40; k += 9) { 18204 for (uint32_t n = 1; n <= 16; n++) { 18205 for (uint32_t m = 1; m <= 3; m++) { 18206 GemmMicrokernelTester() 18207 .mr(3) 18208 .nr(16) 18209 .kr(1) 18210 .sr(1) 18211 .m(m) 18212 .n(n) 18213 .k(k) 18214 .cm_stride(19) 18215 .iterations(1) 18216 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18217 } 18218 } 18219 } 18220 } 18221 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,qmin)18222 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, qmin) { 18223 TEST_REQUIRES_ARM_NEON; 18224 GemmMicrokernelTester() 18225 .mr(3) 18226 .nr(16) 18227 .kr(1) 18228 .sr(1) 18229 .m(3) 18230 .n(16) 18231 .k(8) 18232 .qmin(128) 18233 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18234 } 18235 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,qmax)18236 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, qmax) { 18237 TEST_REQUIRES_ARM_NEON; 18238 GemmMicrokernelTester() 18239 .mr(3) 18240 .nr(16) 18241 .kr(1) 18242 .sr(1) 18243 .m(3) 18244 .n(16) 18245 .k(8) 18246 .qmax(128) 18247 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18248 } 18249 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,strided_cm)18250 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, strided_cm) { 18251 TEST_REQUIRES_ARM_NEON; 18252 GemmMicrokernelTester() 18253 .mr(3) 18254 .nr(16) 18255 .kr(1) 18256 .sr(1) 18257 .m(3) 18258 .n(16) 18259 .k(8) 18260 .cm_stride(19) 18261 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18262 } 18263 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 18264 18265 18266 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_eq_8)18267 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8) { 18268 TEST_REQUIRES_ARM_NEON; 18269 GemmMicrokernelTester() 18270 .mr(3) 18271 .nr(8) 18272 .kr(1) 18273 .sr(1) 18274 .m(3) 18275 .n(8) 18276 .k(8) 18277 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18278 } 18279 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,strided_cn)18280 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, strided_cn) { 18281 TEST_REQUIRES_ARM_NEON; 18282 GemmMicrokernelTester() 18283 .mr(3) 18284 .nr(8) 18285 .kr(1) 18286 .sr(1) 18287 .m(3) 18288 .n(8) 18289 .k(8) 18290 .cn_stride(11) 18291 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18292 } 18293 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_eq_8_strided_a)18294 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) { 18295 TEST_REQUIRES_ARM_NEON; 18296 GemmMicrokernelTester() 18297 .mr(3) 18298 .nr(8) 18299 .kr(1) 18300 .sr(1) 18301 .m(3) 18302 .n(8) 18303 .k(8) 18304 .a_stride(11) 18305 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18306 } 18307 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_eq_8_subtile)18308 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) { 18309 TEST_REQUIRES_ARM_NEON; 18310 for (uint32_t n = 1; n <= 8; n++) { 18311 for (uint32_t m = 1; m <= 3; m++) { 18312 GemmMicrokernelTester() 18313 .mr(3) 18314 .nr(8) 18315 .kr(1) 18316 .sr(1) 18317 .m(m) 18318 .n(n) 18319 .k(8) 18320 .iterations(1) 18321 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18322 } 18323 } 18324 } 18325 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_eq_8_subtile_m)18326 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) { 18327 TEST_REQUIRES_ARM_NEON; 18328 for (uint32_t m = 1; m <= 3; m++) { 18329 GemmMicrokernelTester() 18330 .mr(3) 18331 .nr(8) 18332 .kr(1) 18333 .sr(1) 18334 .m(m) 18335 .n(8) 18336 .k(8) 18337 .iterations(1) 18338 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18339 } 18340 } 18341 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_eq_8_subtile_n)18342 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) { 18343 TEST_REQUIRES_ARM_NEON; 18344 for (uint32_t n = 1; n <= 8; n++) { 18345 GemmMicrokernelTester() 18346 .mr(3) 18347 .nr(8) 18348 .kr(1) 18349 .sr(1) 18350 .m(3) 18351 .n(n) 18352 .k(8) 18353 .iterations(1) 18354 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18355 } 18356 } 18357 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_lt_8)18358 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_lt_8) { 18359 TEST_REQUIRES_ARM_NEON; 18360 for (size_t k = 1; k < 8; k++) { 18361 GemmMicrokernelTester() 18362 .mr(3) 18363 .nr(8) 18364 .kr(1) 18365 .sr(1) 18366 .m(3) 18367 .n(8) 18368 .k(k) 18369 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18370 } 18371 } 18372 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_lt_8_strided_a)18373 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_lt_8_strided_a) { 18374 TEST_REQUIRES_ARM_NEON; 18375 for (size_t k = 1; k < 8; k++) { 18376 GemmMicrokernelTester() 18377 .mr(3) 18378 .nr(8) 18379 .kr(1) 18380 .sr(1) 18381 .m(3) 18382 .n(8) 18383 .k(k) 18384 .a_stride(11) 18385 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18386 } 18387 } 18388 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_lt_8_subtile)18389 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) { 18390 TEST_REQUIRES_ARM_NEON; 18391 for (size_t k = 1; k < 8; k++) { 18392 for (uint32_t n = 1; n <= 8; n++) { 18393 for (uint32_t m = 1; m <= 3; m++) { 18394 GemmMicrokernelTester() 18395 .mr(3) 18396 .nr(8) 18397 .kr(1) 18398 .sr(1) 18399 .m(m) 18400 .n(n) 18401 .k(k) 18402 .iterations(1) 18403 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18404 } 18405 } 18406 } 18407 } 18408 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_gt_8)18409 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_gt_8) { 18410 TEST_REQUIRES_ARM_NEON; 18411 for (size_t k = 9; k < 16; k++) { 18412 GemmMicrokernelTester() 18413 .mr(3) 18414 .nr(8) 18415 .kr(1) 18416 .sr(1) 18417 .m(3) 18418 .n(8) 18419 .k(k) 18420 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18421 } 18422 } 18423 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_gt_8_strided_a)18424 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_gt_8_strided_a) { 18425 TEST_REQUIRES_ARM_NEON; 18426 for (size_t k = 9; k < 16; k++) { 18427 GemmMicrokernelTester() 18428 .mr(3) 18429 .nr(8) 18430 .kr(1) 18431 .sr(1) 18432 .m(3) 18433 .n(8) 18434 .k(k) 18435 .a_stride(19) 18436 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18437 } 18438 } 18439 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_gt_8_subtile)18440 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) { 18441 TEST_REQUIRES_ARM_NEON; 18442 for (size_t k = 9; k < 16; k++) { 18443 for (uint32_t n = 1; n <= 8; n++) { 18444 for (uint32_t m = 1; m <= 3; m++) { 18445 GemmMicrokernelTester() 18446 .mr(3) 18447 .nr(8) 18448 .kr(1) 18449 .sr(1) 18450 .m(m) 18451 .n(n) 18452 .k(k) 18453 .iterations(1) 18454 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18455 } 18456 } 18457 } 18458 } 18459 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_div_8)18460 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_div_8) { 18461 TEST_REQUIRES_ARM_NEON; 18462 for (size_t k = 16; k <= 80; k += 8) { 18463 GemmMicrokernelTester() 18464 .mr(3) 18465 .nr(8) 18466 .kr(1) 18467 .sr(1) 18468 .m(3) 18469 .n(8) 18470 .k(k) 18471 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18472 } 18473 } 18474 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_div_8_strided_a)18475 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_div_8_strided_a) { 18476 TEST_REQUIRES_ARM_NEON; 18477 for (size_t k = 16; k <= 80; k += 8) { 18478 GemmMicrokernelTester() 18479 .mr(3) 18480 .nr(8) 18481 .kr(1) 18482 .sr(1) 18483 .m(3) 18484 .n(8) 18485 .k(k) 18486 .a_stride(83) 18487 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18488 } 18489 } 18490 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_div_8_subtile)18491 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) { 18492 TEST_REQUIRES_ARM_NEON; 18493 for (size_t k = 16; k <= 80; k += 8) { 18494 for (uint32_t n = 1; n <= 8; n++) { 18495 for (uint32_t m = 1; m <= 3; m++) { 18496 GemmMicrokernelTester() 18497 .mr(3) 18498 .nr(8) 18499 .kr(1) 18500 .sr(1) 18501 .m(m) 18502 .n(n) 18503 .k(k) 18504 .iterations(1) 18505 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18506 } 18507 } 18508 } 18509 } 18510 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,n_gt_8)18511 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_gt_8) { 18512 TEST_REQUIRES_ARM_NEON; 18513 for (uint32_t n = 9; n < 16; n++) { 18514 for (size_t k = 1; k <= 40; k += 9) { 18515 GemmMicrokernelTester() 18516 .mr(3) 18517 .nr(8) 18518 .kr(1) 18519 .sr(1) 18520 .m(3) 18521 .n(n) 18522 .k(k) 18523 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18524 } 18525 } 18526 } 18527 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,n_gt_8_strided_cn)18528 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) { 18529 TEST_REQUIRES_ARM_NEON; 18530 for (uint32_t n = 9; n < 16; n++) { 18531 for (size_t k = 1; k <= 40; k += 9) { 18532 GemmMicrokernelTester() 18533 .mr(3) 18534 .nr(8) 18535 .kr(1) 18536 .sr(1) 18537 .m(3) 18538 .n(n) 18539 .k(k) 18540 .cn_stride(11) 18541 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18542 } 18543 } 18544 } 18545 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,n_gt_8_strided_a)18546 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_a) { 18547 TEST_REQUIRES_ARM_NEON; 18548 for (uint32_t n = 9; n < 16; n++) { 18549 for (size_t k = 1; k <= 40; k += 9) { 18550 GemmMicrokernelTester() 18551 .mr(3) 18552 .nr(8) 18553 .kr(1) 18554 .sr(1) 18555 .m(3) 18556 .n(n) 18557 .k(k) 18558 .a_stride(43) 18559 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18560 } 18561 } 18562 } 18563 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,n_gt_8_subtile)18564 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) { 18565 TEST_REQUIRES_ARM_NEON; 18566 for (uint32_t n = 9; n < 16; n++) { 18567 for (size_t k = 1; k <= 40; k += 9) { 18568 for (uint32_t m = 1; m <= 3; m++) { 18569 GemmMicrokernelTester() 18570 .mr(3) 18571 .nr(8) 18572 .kr(1) 18573 .sr(1) 18574 .m(m) 18575 .n(n) 18576 .k(k) 18577 .iterations(1) 18578 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18579 } 18580 } 18581 } 18582 } 18583 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,n_div_8)18584 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_div_8) { 18585 TEST_REQUIRES_ARM_NEON; 18586 for (uint32_t n = 16; n <= 24; n += 8) { 18587 for (size_t k = 1; k <= 40; k += 9) { 18588 GemmMicrokernelTester() 18589 .mr(3) 18590 .nr(8) 18591 .kr(1) 18592 .sr(1) 18593 .m(3) 18594 .n(n) 18595 .k(k) 18596 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18597 } 18598 } 18599 } 18600 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,n_div_8_strided_cn)18601 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) { 18602 TEST_REQUIRES_ARM_NEON; 18603 for (uint32_t n = 16; n <= 24; n += 8) { 18604 for (size_t k = 1; k <= 40; k += 9) { 18605 GemmMicrokernelTester() 18606 .mr(3) 18607 .nr(8) 18608 .kr(1) 18609 .sr(1) 18610 .m(3) 18611 .n(n) 18612 .k(k) 18613 .cn_stride(11) 18614 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18615 } 18616 } 18617 } 18618 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,n_div_8_strided_a)18619 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_div_8_strided_a) { 18620 TEST_REQUIRES_ARM_NEON; 18621 for (uint32_t n = 16; n <= 24; n += 8) { 18622 for (size_t k = 1; k <= 40; k += 9) { 18623 GemmMicrokernelTester() 18624 .mr(3) 18625 .nr(8) 18626 .kr(1) 18627 .sr(1) 18628 .m(3) 18629 .n(n) 18630 .k(k) 18631 .a_stride(43) 18632 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18633 } 18634 } 18635 } 18636 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,n_div_8_subtile)18637 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) { 18638 TEST_REQUIRES_ARM_NEON; 18639 for (uint32_t n = 16; n <= 24; n += 8) { 18640 for (size_t k = 1; k <= 40; k += 9) { 18641 for (uint32_t m = 1; m <= 3; m++) { 18642 GemmMicrokernelTester() 18643 .mr(3) 18644 .nr(8) 18645 .kr(1) 18646 .sr(1) 18647 .m(m) 18648 .n(n) 18649 .k(k) 18650 .iterations(1) 18651 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18652 } 18653 } 18654 } 18655 } 18656 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,strided_cm_subtile)18657 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) { 18658 TEST_REQUIRES_ARM_NEON; 18659 for (size_t k = 1; k <= 40; k += 9) { 18660 for (uint32_t n = 1; n <= 8; n++) { 18661 for (uint32_t m = 1; m <= 3; m++) { 18662 GemmMicrokernelTester() 18663 .mr(3) 18664 .nr(8) 18665 .kr(1) 18666 .sr(1) 18667 .m(m) 18668 .n(n) 18669 .k(k) 18670 .cm_stride(11) 18671 .iterations(1) 18672 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18673 } 18674 } 18675 } 18676 } 18677 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,qmin)18678 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, qmin) { 18679 TEST_REQUIRES_ARM_NEON; 18680 GemmMicrokernelTester() 18681 .mr(3) 18682 .nr(8) 18683 .kr(1) 18684 .sr(1) 18685 .m(3) 18686 .n(8) 18687 .k(8) 18688 .qmin(128) 18689 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18690 } 18691 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,qmax)18692 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, qmax) { 18693 TEST_REQUIRES_ARM_NEON; 18694 GemmMicrokernelTester() 18695 .mr(3) 18696 .nr(8) 18697 .kr(1) 18698 .sr(1) 18699 .m(3) 18700 .n(8) 18701 .k(8) 18702 .qmax(128) 18703 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18704 } 18705 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,strided_cm)18706 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, strided_cm) { 18707 TEST_REQUIRES_ARM_NEON; 18708 GemmMicrokernelTester() 18709 .mr(3) 18710 .nr(8) 18711 .kr(1) 18712 .sr(1) 18713 .m(3) 18714 .n(8) 18715 .k(8) 18716 .cm_stride(11) 18717 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18718 } 18719 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 18720